Esempio n. 1
0
def BinaryClassification(samples, cluster_sizes, mu, sigma):
    pdf = []
    for i in range(2):
        pdf.append(np.transpose(P.normpdf(samples, mu[i], sigma[i])))

    our_classification = P.prod(pdf[0], axis=0) * cluster_sizes[0] < P.prod(
        pdf[1], axis=0) * cluster_sizes[1]
    return our_classification
Esempio n. 2
0
def test3():  # only location1 input
    afr_caps = (
        (-33.925, 18.424),  # Cape Town
        (-26.204, 28.046),  # Joburg
        (-17.864, 31.030),  # Harare
        (30.050, 31.233),  # Cairo
        (-4.325, 15.322),  # Kinshasa
        (8.484, -13.234))  # Freetown

    # http://distancecalculator.globefeed.com
    # distances are from Cape Town and are in km
    known_dists = (
        1261.0,  # Joburg
        2186.6,  # Harare
        7239.0,  # Cairo 
        3305.4,  # Kinshasa
        5776.7)  # Freetown

    hav_dists = haversine(afr_caps)
    dist_diff = abs(hav_dists[0, 1:] - known_dists)

    # Test whether known and haversine distance are within 50km
    if prod(dist_diff < 50.):  # if any are false whole list is false
        print "PASSED: Test 3 (nxn point case)"
    else:
        print "FAILED: Test 3 (nxn point case)"
Esempio n. 3
0
def test2(): # location1 = 1x1; location2 = 1xn
    # Cape Town will be location1
    capetn = (-33.925, 18.424) # Cape Town
    
    # African capilats will be location2
    # coordinates from Google
    afr_caps = ((-17.864, 31.030), # Harare
                ( 30.050, 31.233), # Cairo
                ( -4.325, 15.322), # Kinshasa
                (  8.484,-13.234)) # Freetown
    
    # http://distancecalculator.globefeed.com
    # distances are from Cape Town and are in km
    known_dists = ( 2186.6, # Harare
                    7239.0, # Cairo 
                    3305.4, # Kinshasa
                    5776.7) # Freetown
    
    hav_dists = haversine(capetn, afr_caps) # get haversine output
    dist_diff = abs(known_dists - hav_dists)
    
    # Test whether known and haversine distance are within 50km
    if prod(dist_diff < 50.): # if any are false whole list is false
        print "PASSED: Test 2 (1xn point case)" 
    else:
        print "FAILED: Test 2 (1xn point case)"
Esempio n. 4
0
    def _readArray(self):
        """Reads the binary data contained in the file"""
        if type(self.regionSize) == list:
            multi_roi = True
            self.data = {('r' + str(i)): []
                         for i in range(len(self.regionSize))}
        else:
            multi_roi = False
            self.data = []

        for frameNb in range(self.nbOfFrames):
            frameData = self._readAtNumpy(
                self.DATAOFFSET + frameNb * self.frameStride,
                self.frameSize / self.dataType().nbytes, self.dataType)
            if multi_roi:
                val_count = 0
                for idx_ROI, ROI in enumerate(self.regionSize):
                    roi_size = pl.array(self.regionSize[idx_ROI])
                    roi_n_vals = pl.prod(roi_size)
                    roi_name = 'r' + str(idx_ROI)

                    self.data[roi_name].append(frameData[val_count:(val_count+roi_n_vals)]\
                            .reshape(roi_size))
                    val_count += roi_n_vals
            else:
                self.data.append(frameData.reshape(self.regionSize))
        if multi_roi:
            self.data = {k: pl.array(v) for k, v in self.data.items()}
        else:
            self.data = pl.array(self.data)
Esempio n. 5
0
    def init(self, img, box):
        img_now = ops.read_image(img)
        self.target_sz = np.array([box[3], box[2]])
        self.pos = np.array([box[1], box[0]]) + self.target_sz / 2
        # print(self.pos)
        # ground_truth =

        # window size, taking padding into account
        self.sz = pylab.floor(self.target_sz * (1 + self.padding))

        # desired output (gaussian shaped), bandwidth proportional to target size
        self.output_sigma = pylab.sqrt(pylab.prod(
            self.target_sz)) * self.output_sigma_factor

        grid_y = pylab.arange(self.sz[0]) - pylab.floor(self.sz[0] / 2)
        grid_x = pylab.arange(self.sz[1]) - pylab.floor(self.sz[1] / 2)
        #[rs, cs] = ndgrid(grid_x, grid_y)
        rs, cs = pylab.meshgrid(grid_x, grid_y)
        y = pylab.exp(-0.5 / self.output_sigma**2 * (rs**2 + cs**2))
        self.yf = pylab.fft2(y)
        # print(self.yf)
        #print("yf.shape ==", yf.shape)
        #print("y.shape ==", y.shape)

        # store pre-computed cosine window
        self.cos_window = pylab.outer(pylab.hanning(self.sz[0]),
                                      pylab.hanning(self.sz[1]))
        if img_now.ndim == 3:
            img_now = ops.rgb2gray(img_now)
        x = ops.get_subwindow(img_now, self.pos, self.sz, self.cos_window)
        k = ops.dense_gauss_kernel(self.sigma, x)
        self.alphaf = pylab.divide(
            self.yf, (pylab.fft2(k) + self.lambda_value))  # Eq. 7
        self.z = x
Esempio n. 6
0
def test2():  # location1 = 1x1; location2 = 1xn
    # Cape Town will be location1
    capetn = (-33.925, 18.424)  # Cape Town

    # African capilats will be location2
    # coordinates from Google
    afr_caps = (
        (-17.864, 31.030),  # Harare
        (30.050, 31.233),  # Cairo
        (-4.325, 15.322),  # Kinshasa
        (8.484, -13.234))  # Freetown

    # http://distancecalculator.globefeed.com
    # distances are from Cape Town and are in km
    known_dists = (
        2186.6,  # Harare
        7239.0,  # Cairo 
        3305.4,  # Kinshasa
        5776.7)  # Freetown

    hav_dists = haversine(capetn, afr_caps)  # get haversine output
    dist_diff = abs(known_dists - hav_dists)

    # Test whether known and haversine distance are within 50km
    if prod(dist_diff < 50.):  # if any are false whole list is false
        print "PASSED: Test 2 (1xn point case)"
    else:
        print "FAILED: Test 2 (1xn point case)"
Esempio n. 7
0
def gaussiannd(f, a, b, N=5):
    """ Multidimensional Gaussian quadrature.
    Here, f is the integrand, a and b are arrays giving the limits
    of the integral, and N is the number of abscissas. """

    a = pylab.asarray(a)
    b = pylab.asarray(b)
    ndim = a.size
    if a.size == 1:  # use normal 1d Gaussian quadrature
        return gaussian(f, a, b)
    fac = 0.5 * (b - a)
    mid = 0.5 * (b + a)
    s = 0.0
    # loop over all possible ndim-vectors of abscissas
    for xw in itertools.product(abscissas(N), repeat=ndim):
        x = pylab.array([x for (x, _) in xw])
        w = pylab.prod([w for (_, w) in xw])
        s += w * f(fac * x + mid)
    return pylab.prod(fac) * s
Esempio n. 8
0
def argmindist_nz(a, v, low=1e-30):
    """Find the index of the row whose distance from v is smallest."""
    a = a.reshape(len(a), prod(a.shape[1:]))
    v = v.ravel()
    n, m = a.shape
    assert v.shape == (m, )
    if a.dtype == numpy.dtype('float32'):
        assert v.dtype == numpy.dtype('float32')
        return ocrofast.argmindist_nz(n, m, a, v, low)
    else:
        raise Exception("unknown data type")
Esempio n. 9
0
def epsfirst(a, v, eps):
    """Walk down the rows of a and find the first row whose
    Euclidean distance from v is smaller than eps."""
    a = a.reshape(len(a), prod(a.shape[1:]))
    v = v.ravel()
    n, m = a.shape
    assert v.shape == (m, )
    if a.dtype == numpy.dtype('float32'):
        assert v.dtype == numpy.dtype('float32')
        return ocrofast.epsfirst(n, m, a, v, eps)
    else:
        raise Exception("unknown data type")
Esempio n. 10
0
def dlfirst(a, eps, v):
    """Walk down the rows of a and find the first row whose
    Euclidean distance from v is smaller than the corresponding
    value in eps."""
    a = a.reshape(len(a), prod(a.shape[1:]))
    eps = eps.ravel()
    v = v.ravel()
    n, m = a.shape
    assert eps.shape == (n, )
    assert v.shape == (m, )
    if a.dtype == numpy.dtype('float32'):
        assert eps.dtype == numpy.dtype('float32')
        assert v.dtype == numpy.dtype('float32')
        return ocrofast.argmindist(n, m, a, eps, v)
    else:
        raise Exception("unknown data type")
Esempio n. 11
0
def alldists(a, v, out=None):
    """Find the index of the row whose distance from v is smallest."""
    a = a.reshape(len(a), prod(a.shape[1:]))
    v = v.ravel()
    n, m = a.shape
    assert v.shape == (m, )
    if a.dtype == numpy.dtype('float32'):
        if out is None: out = numpy.zeros(n, 'f')
        assert v.dtype == numpy.dtype('float32')
        assert out.dtype == numpy.dtype('float32')
        ocrofast.alldists(n, m, out, a, v)
        return out
    elif a.dtype == numpy.dtype('float64'):
        if out is None: out = numpy.zeros(n, 'd')
        assert v.dtype == numpy.dtype('float64')
        assert out.dtype == numpy.dtype('float64')
        ocrofast.alldists_d(n, m, out, a, v)
        return out
    else:
        raise Exception("unknown data type")
Esempio n. 12
0
def alldists_sig1(a, s, v, out=None):
    """Find all the Mahalanobis distances of v from the rows of a, using s as a
    single diagonal covariance matrix for all comparisons."""
    a = a.reshape(len(a), prod(a.shape[1:]))
    v = v.ravel()
    n, m = a.shape
    assert v.shape == (m, )
    assert s.shape == (m, )
    if a.dtype == numpy.dtype('float32'):
        if out is None: out = numpy.zeros(n, 'f')
        assert v.dtype == numpy.dtype('float32')
        assert out.dtype == numpy.dtype('float32')
        ocrofast.alldists_sig1(n, m, out, a, s, v)
        return out
    elif a.dtype == numpy.dtype('float64'):
        if out is None: out = numpy.zeros(n, 'd')
        assert v.dtype == numpy.dtype('float64')
        assert out.dtype == numpy.dtype('float64')
        ocrofast.alldists_sig1_d(n, m, out, a, s, v)
        return out
    else:
        raise Exception("unknown data type")
    def initialize(self, image, pos, target_sz):
        if len(image.shape) == 3 and image.shape[2] > 1:
            image = rgb2gray(image)
        self.image = image
        if self.should_resize_image:
            self.image = scipy.misc.imresize(self.image, 0.5)
            self.image = self.image / 255.0

        # window size, taking padding into account
        self.sz = pylab.floor(target_sz * (1 + self.padding))
        self.pos = pos

        # desired output (gaussian shaped), bandwidth proportional to target size
        output_sigma = pylab.sqrt(pylab.prod(
            self.sz)) * self.output_sigma_factor

        grid_y = pylab.arange(self.sz[0]) - pylab.floor(self.sz[0] / 2)
        grid_x = pylab.arange(self.sz[1]) - pylab.floor(self.sz[1] / 2)
        #[rs, cs] = ndgrid(grid_x, grid_y)
        rs, cs = pylab.meshgrid(grid_x, grid_y)
        self.y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2))
        self.yf = pylab.fft2(self.y)

        # store pre-computed cosine window
        self.cos_window = pylab.outer(pylab.hanning(self.sz[0]),
                                      pylab.hanning(self.sz[1]))

        # get subwindow at current estimated target position,
        # to train classifer
        x = get_subwindow(self.image, self.pos, self.sz, self.cos_window)

        # Kernel Regularized Least-Squares,
        # calculate alphas (in Fourier domain)
        k = dense_gauss_kernel(self.sigma, x)
        self.alphaf = pylab.divide(
            self.yf, (pylab.fft2(k) + self.lambda_value))  # Eq. 7
        self.z = x

        return
    def initialize(self, image, pos , target_sz ):
        if len(image.shape) == 3 and image.shape[2] > 1:
            image = rgb2gray(image)
        self.image = image
        if self.should_resize_image:
            self.image = scipy.misc.imresize(self.image, 0.5)
            self.image = self.image / 255.0

        # window size, taking padding into account
        self.sz = pylab.floor(target_sz * (1 + self.padding))
        self.pos = pos

        # desired output (gaussian shaped), bandwidth proportional to target size
        output_sigma = pylab.sqrt(pylab.prod(self.sz)) * self.output_sigma_factor

        grid_y = pylab.arange(self.sz[0]) - pylab.floor(self.sz[0]/2)
        grid_x = pylab.arange(self.sz[1]) - pylab.floor(self.sz[1]/2)
        #[rs, cs] = ndgrid(grid_x, grid_y)
        rs, cs = pylab.meshgrid(grid_x, grid_y)
        self.y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2))
        self.yf = pylab.fft2(self.y)

        # store pre-computed cosine window
        self.cos_window = pylab.outer(pylab.hanning(self.sz[0]),
                                      pylab.hanning(self.sz[1]))

        # get subwindow at current estimated target position,
        # to train classifer
        x = get_subwindow(self.image, self.pos, self.sz, self.cos_window)

        # Kernel Regularized Least-Squares,
        # calculate alphas (in Fourier domain)
        k = dense_gauss_kernel(self.sigma, x)
        self.alphaf = pylab.divide(self.yf, (pylab.fft2(k) + self.lambda_value))  # Eq. 7
        self.z = x

        return
Esempio n. 15
0
def test3(): # only location1 input
    afr_caps = ((-33.925, 18.424), # Cape Town
                (-26.204, 28.046), # Joburg
                (-17.864, 31.030), # Harare
                ( 30.050, 31.233), # Cairo
                ( -4.325, 15.322), # Kinshasa
                (  8.484,-13.234)) # Freetown
    
    # http://distancecalculator.globefeed.com
    # distances are from Cape Town and are in km
    known_dists = ( 1261.0, # Joburg
                    2186.6, # Harare
                    7239.0, # Cairo 
                    3305.4, # Kinshasa
                    5776.7) # Freetown
    
    hav_dists = haversine(afr_caps)
    dist_diff = abs(hav_dists[0,1:] - known_dists)
    
    # Test whether known and haversine distance are within 50km
    if prod(dist_diff < 50.): # if any are false whole list is false
        print "PASSED: Test 3 (nxn point case)" 
    else:
        print "FAILED: Test 3 (nxn point case)"
Esempio n. 16
0
def montecarlo(f, a, b, eps=1e-3, nmin=100, nmax=1000000):
    """ Monte Carlo integration.
    Here, f is the integrand, a and b are arrays giving the limits
    of the integral, and eps is the desired accuracy.
    The parameters nmin and nmax specify the minimum and
    maximum number of random points to use. """

    a = pylab.asarray(a)
    b = pylab.asarray(b)
    vol = pylab.prod(b - a)
    s = 0.0  # running average of f(x)
    ssq = 0.0  # running sum of (f(x)-s)**2
    n = 0
    while n < nmax:
        n += 1
        x = pylab.uniform(a, b)
        fx = f(x)
        d = fx - s
        s += d / n
        ssq += d * (fx - s)
        err = ssq**0.5 / n  # assume n-1 ~= n
        if n > nmin and err < eps * abs(s):
            break
    return vol * s
Esempio n. 17
0
def binarize_image(job):
    image_object, i = job
    raw = read_image_gray(image_object)
    image = raw - amin(raw)
    if amax(image) == amin(image):
        return  # Image is empty
    image /= amax(image)
    check = check_page(amax(image) - image)
    if check is not None:
        return
    if args.gray:
        extreme = 0
    else:
        extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod(
            image.shape)

    if extreme > 0.95:
        comment = "no-normalization"
        flat = image
    else:
        comment = ""
        m = interpolation.zoom(image, args.zoom)
        m = filters.percentile_filter(m, args.perc, size=(args.range, 2))
        m = filters.percentile_filter(m, args.perc, size=(2, args.range))
        m = interpolation.zoom(m, 1.0 / args.zoom)
        w, h = minimum(array(image.shape), array(m.shape))
        flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)

    if args.maxskew > 0:
        d0, d1 = flat.shape
        o0, o1 = int(args.bignore * d0), int(args.bignore * d1)
        flat = amax(flat) - flat
        flat -= amin(flat)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        ma = args.maxskew
        ms = int(2 * args.maxskew * args.skewsteps)
        angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
        flat = interpolation.rotate(flat, angle, mode='constant', reshape=0)
        flat = amax(flat) - flat
    else:
        angle = 0

    d0, d1 = flat.shape
    o0, o1 = int(args.bignore * d0), int(args.bignore * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]

    if args.escale > 0:
        e = args.escale
        v = est - filters.gaussian_filter(est, e * 20.0)
        v = filters.gaussian_filter(v**2, e * 20.0)**0.5
        v = (v > 0.3 * amax(v))
        v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
        v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(), args.lo)
    hi = stats.scoreatpercentile(est.ravel(), args.hi)
    flat -= lo
    flat /= (hi - lo)
    flat = clip(flat, 0, 1)
    binary = 1 * (flat > args.threshold)
    return (binary, flat)
def track(input_video_path):
    """
    notation: variables ending with f are in the frequency domain.
    """

    # parameters according to the paper --
    padding = 1.0  # extra area surrounding the target
    # spatial bandwidth (proportional to target)
    output_sigma_factor = 1 / float(16)
    sigma = 0.2  # gaussian kernel bandwidth
    lambda_value = 1e-2  # regularization
    # linear interpolation factor for adaptation
    interpolation_factor = 0.075

    info = load_video_info(input_video_path)
    img_files, pos, target_sz, \
        should_resize_image, ground_truth, video_path = info

    # window size, taking padding into account
    sz = pylab.floor(target_sz * (1 + padding))

    # desired output (gaussian shaped), bandwidth proportional to target size
    output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor

    grid_y = pylab.arange(sz[0]) - pylab.floor(sz[0] / 2)
    grid_x = pylab.arange(sz[1]) - pylab.floor(sz[1] / 2)
    # [rs, cs] = ndgrid(grid_x, grid_y)
    rs, cs = pylab.meshgrid(grid_x, grid_y)
    y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2))
    yf = pylab.fft2(y)
    # print("yf.shape ==", yf.shape)
    # print("y.shape ==", y.shape)

    # store pre-computed cosine window
    cos_window = pylab.outer(pylab.hanning(sz[0]), pylab.hanning(sz[1]))

    total_time = 0  # to calculate FPS
    positions = pylab.zeros((len(img_files), 2))  # to calculate precision

    global z, response
    z = None
    alphaf = None
    response = None

    for frame, image_filename in enumerate(img_files):

        if True and ((frame % 10) == 0):
            print("Processing frame", frame)

        # load image
        image_path = os.path.join(video_path, image_filename)
        im = pylab.imread(image_path)
        if len(im.shape) == 3 and im.shape[2] > 1:
            im = rgb2gray(im)

        # print("Image max/min value==", im.max(), "/", im.min())

        if should_resize_image:
            im = scipy.misc.imresize(im, 0.5)

        start_time = time.time()

        # extract and pre-process subwindow
        x = get_subwindow(im, pos, sz, cos_window)

        is_first_frame = (frame == 0)

        if not is_first_frame:
            # calculate response of the classifier at all locations
            k = dense_gauss_kernel(sigma, x, z)
            kf = pylab.fft2(k)
            alphaf_kf = pylab.multiply(alphaf, kf)
            response = pylab.real(pylab.ifft2(alphaf_kf))  # Eq. 9

            # target location is at the maximum response
            r = response
            row, col = pylab.unravel_index(r.argmax(), r.shape)
            pos = pos - pylab.floor(sz / 2) + [row, col]

            if debug:
                print("Frame ==", frame)
                print("Max response", r.max(), "at", [row, col])
                pylab.figure()
                pylab.imshow(cos_window)
                pylab.title("cos_window")

                pylab.figure()
                pylab.imshow(x)
                pylab.title("x")

                pylab.figure()
                pylab.imshow(response)
                pylab.title("response")
                pylab.show(block=True)

        # end "if not first frame"

        # get subwindow at current estimated target position,
        # to train classifer
        x = get_subwindow(im, pos, sz, cos_window)

        # Kernel Regularized Least-Squares,
        # calculate alphas (in Fourier domain)
        k = dense_gauss_kernel(sigma, x)
        new_alphaf = pylab.divide(yf, (pylab.fft2(k) + lambda_value))  # Eq. 7
        new_z = x

        if is_first_frame:
            # first frame, train with a single image
            alphaf = new_alphaf
            z = x
        else:
            # subsequent frames, interpolate model
            f = interpolation_factor
            alphaf = (1 - f) * alphaf + f * new_alphaf
            z = (1 - f) * z + f * new_z
        # end "first frame or not"

        # save position and calculate FPS
        positions[frame, :] = pos
        total_time += time.time() - start_time

        # visualization
        plot_tracking(frame, pos, target_sz, im, ground_truth)
    # end of "for each image in video"

    if should_resize_image:
        positions = positions * 2

    print("Frames-per-second:", len(img_files) / total_time)

    title = os.path.basename(os.path.normpath(input_video_path))

    if len(ground_truth) > 0:
        # show the precisions plot
        show_precision(positions, ground_truth, video_path, title)

    return
Esempio n. 19
0
    def init(self, img, rect):
        im_width = img.shape[1]
        im_heihgt = img.shape[0]
        ys = pylab.floor(rect[1]) + pylab.arange(rect[3], dtype=int)
        xs = pylab.floor(rect[0]) + pylab.arange(rect[2], dtype=int)
        ys = ys.astype(int)
        xs = xs.astype(int)
        # check for out-of-bounds coordinates,
        # and set them to the values at the borders
        ys[ys < 0] = 0
        ys[ys >= img.shape[0]] = img.shape[0] - 1

        xs[xs < 0] = 0
        xs[xs >= img.shape[1]] = img.shape[1] - 1

        self.rect = rect  #rectangle contains the bounding box of the target
        #pos is the center postion of the tracking object (cy,cx)
        self.pos = pylab.array([rect[1] + rect[3] / 2, rect[0] + rect[2] / 2])
        self.posOffset = np.array([0, 0], np.int)
        self.tlx = rect[0]
        self.tly = rect[1]
        self.trackNo = 0
        # parameters according to the paper --

        padding = 1.0  # extra area surrounding the target(扩大窗口的因子,默认扩大2倍)
        # spatial bandwidth (proportional to target)
        output_sigma_factor = 1 / float(16)
        self.sigma = 0.2  # gaussian kernel bandwidth
        self.lambda_value = 1e-2  # regularization
        # linear interpolation factor for adaptation
        self.interpolation_factor = 0.075

        #target_ze equals to [rect3, rect2]
        target_sz = pylab.array([int(rect[3]), int(rect[2])])
        # window size(Extended window size), taking padding into account
        window_sz = pylab.floor(target_sz * (1 + padding))

        self.window_sz = window_sz
        self.target_sz = target_sz

        # desired output (gaussian shaped), bandwidth proportional to target size
        output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor

        grid_y = pylab.arange(window_sz[0]) - pylab.floor(window_sz[0] / 2)
        grid_x = pylab.arange(window_sz[1]) - pylab.floor(window_sz[1] / 2)
        # [rs, cs] = ndgrid(grid_x, grid_y)
        rs, cs = pylab.meshgrid(grid_x, grid_y)
        y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2))
        self.yf = pylab.fft2(y)
        # store pre-computed cosine window
        self.cos_window = pylab.outer(pylab.hanning(window_sz[0]),
                                      pylab.hanning(window_sz[1]))

        # get subwindow at current estimated target position, to train classifer
        x = self.get_subwindow(img, self.pos, window_sz, self.cos_window)
        # Kernel Regularized Least-Squares, calculate alphas (in Fourier domain)
        k = self.dense_gauss_kernel(self.sigma, x)
        #storing computed alphaf and z for next frame iteration
        self.alphaf = pylab.divide(
            self.yf, (pylab.fft2(k) + self.lambda_value))  # Eq. 7
        self.z = x

        #monitoring the tracker's self status, based on the continuity of psr
        self.self_status = 0
        #monitoring the collaborative status, based on the distance to the voted object bouding box center,  and on psr also.
        self.collaborate_status = 5

        self.collabor_container = np.ones((10, 1), np.int)
        self.highpsr_container = np.ones((10, 1), np.int)
        self.FourRecentRects = np.zeros((4, 4), np.float)
        #return initialization status
        return True
def track(input_video_path):
    """
    notation: variables ending with f are in the frequency domain.
    """

    # parameters according to the paper --
    padding = 1.0  # extra area surrounding the target
    #spatial bandwidth (proportional to target)
    output_sigma_factor = 1 / float(16)
    sigma = 0.2  # gaussian kernel bandwidth
    lambda_value = 1e-2  # regularization
    # linear interpolation factor for adaptation
    interpolation_factor = 0.075

    info = load_video_info(input_video_path)
    img_files, pos, target_sz, \
        should_resize_image, ground_truth, video_path = info

    # window size, taking padding into account
    sz = pylab.floor(target_sz * (1 + padding))

    # desired output (gaussian shaped), bandwidth proportional to target size
    output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor

    grid_y = pylab.arange(sz[0]) - pylab.floor(sz[0]/2)
    grid_x = pylab.arange(sz[1]) - pylab.floor(sz[1]/2)
    #[rs, cs] = ndgrid(grid_x, grid_y)
    rs, cs = pylab.meshgrid(grid_x, grid_y)
    y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2))
    yf = pylab.fft2(y)
    #print("yf.shape ==", yf.shape)
    #print("y.shape ==", y.shape)

    # store pre-computed cosine window
    cos_window = pylab.outer(pylab.hanning(sz[0]),
                             pylab.hanning(sz[1]))

    total_time = 0  # to calculate FPS
    positions = pylab.zeros((len(img_files), 2))  # to calculate precision

    global z, response
    z = None
    alphaf = None
    response = None

    for frame, image_filename in enumerate(img_files):

        if True and ((frame % 10) == 0):
            print("Processing frame", frame)

        # load image
        image_path = os.path.join(video_path, image_filename)

        im = pylab.imread(image_path)
        if len(im.shape) == 3 and im.shape[2] > 1:
            im = rgb2gray(im)

        #print("Image max/min value==", im.max(), "/", im.min())

        if should_resize_image:
            im = scipy.misc.imresize(im, 0.5)

        start_time = time.time()

        # extract and pre-process subwindow
        x = get_subwindow(im, pos, sz, cos_window)

        if debug:
            pylab.figure()
            pylab.imshow(x)
            pylab.title("sub window")

        is_first_frame = (frame == 0)

        if not is_first_frame:
            # calculate response of the classifier at all locations
            k = dense_gauss_kernel(sigma, x, z)
            kf = pylab.fft2(k)
            alphaf_kf = pylab.multiply(alphaf, kf)
            response = pylab.real(pylab.ifft2(alphaf_kf))  # Eq. 9

            # target location is at the maximum response
            r = response
            row, col = pylab.unravel_index(r.argmax(), r.shape)
            pos = pos - pylab.floor(sz/2) + [row, col]

            if debug:
                print("Frame ==", frame)
                print("Max response", r.max(), "at", [row, col])
                pylab.figure()
                pylab.imshow(cos_window)
                pylab.title("cos_window")

                pylab.figure()
                pylab.imshow(x)
                pylab.title("x")

                pylab.figure()
                pylab.imshow(response)
                pylab.title("response")
                pylab.show(block=True)

        # end "if not first frame"

        # get subwindow at current estimated target position,
        # to train classifer
        x = get_subwindow(im, pos, sz, cos_window)

        # Kernel Regularized Least-Squares,
        # calculate alphas (in Fourier domain)
        k = dense_gauss_kernel(sigma, x)
        new_alphaf = pylab.divide(yf, (pylab.fft2(k) + lambda_value))  # Eq. 7
        new_z = x

        if is_first_frame:
            #first frame, train with a single image
            alphaf = new_alphaf
            z = x
        else:
            # subsequent frames, interpolate model
            f = interpolation_factor
            alphaf = (1 - f) * alphaf + f * new_alphaf
            z = (1 - f) * z + f * new_z
        # end "first frame or not"

        # save position and calculate FPS
        positions[frame, :] = pos
        total_time += time.time() - start_time

        # visualization
        plot_tracking(frame, pos, target_sz, im, ground_truth)
    # end of "for each image in video"

    if should_resize_image:
        positions = positions * 2

    print("Frames-per-second:",  len(img_files) / total_time)

    title = os.path.basename(os.path.normpath(input_video_path))

    if len(ground_truth) > 0:
        # show the precisions plot
        show_precision(positions, ground_truth, video_path, title)

    return
Esempio n. 21
0
def track(input_video_path, show_tracking):
    """
    注意:以 f 结尾的变量表示频率域
    """

    # 目标周围的额外区域
    padding = 1.0
    # 空间带宽,与目标成比例
    output_sigma_factor = 1 / float(16)
    # 高斯核带宽
    sigma = 0.2
    # 正则化系数
    lambda_value = 1e-2
    # 线性插值因子
    interpolation_factor = 0.075
    # 加载视频信息,包括待测试的每帧图片列表,首帧目标矩形框中心点坐标[y,x],矩形框高、宽一半的大小,是否进行图片缩放一半
    # 每帧图片的 ground truth 信息,视频路径
    info = load_video_info.load_video_info(input_video_path)
    img_files, pos, target_sz, should_resize_image, ground_truth, video_path = info

    # 把填充考虑进去,定义为窗口大小。
    sz = pylab.floor(target_sz * (1 + padding))

    # 计算想要的高斯形状的输出,其中带宽正比于目标矩形框大小
    output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor
    # 平移目标矩形框的高度,以中心点为圆点,得到高度坐标列表
    # 平移目标矩形框的宽度,以中心点为圆点,得到宽度坐标列表
    grid_y = pylab.arange(sz[0]) - pylab.floor(sz[0] / 2)
    grid_x = pylab.arange(sz[1]) - pylab.floor(sz[1] / 2)
    # 把坐标列表边长坐标矩阵,即对二维平面范围内的区域进行网格划分
    rs, cs = pylab.meshgrid(grid_x, grid_y)
    # 论文中公式 (19),计算得到 [0, 1] 值,越靠近中心点值越大,反之越小
    y = pylab.exp((-0.5 / output_sigma ** 2) * (rs ** 2 + cs ** 2))
    # 计算二维离散傅里叶变换
    yf = pylab.fft2(y)

    # 首先计算矩形框高(某一个整数值)的 Hanning 窗(加权的余弦窗),其次计算矩形框宽的 Hanning 窗
    # 最后计算两个向量的外积得到矩形框的余弦窗
    cos_window = pylab.outer(pylab.hanning(sz[0]), pylab.hanning(sz[1]))
    # 计算 FPS
    total_time = 0  # to calculate FPS
    # 计算精度值
    positions = pylab.zeros((len(img_files), 2))  # to calculate precision

    # global z, response
    plot_tracking.z = None
    alphaf = None
    plot_tracking.response = None
    # 依次访问图像从图像名列表中
    for frame, image_filename in enumerate(img_files):
        if (frame % 10) == 0:
            print("Processing frame", frame)
        # 读取图像
        image_path = os.path.join(video_path, image_filename)
        im = pylab.imread(image_path)
        # 如果图像是彩色图像,则转化为灰度图像
        if len(im.shape) == 3 and im.shape[2] > 1:
            im = rgb2gray.rgb2gray(im)
        # 如果需要进行图像缩放,则缩放为原来一半
        if should_resize_image:
            im = np.array(Image.fromarray(im).resize((int(im.shape[0] / 2), int(im.shape[1] / 2))))

        # 开始计时
        start_time = time.time()

        # 提取并预处理子窗口,采用余弦子窗口
        x = get_subwindow.get_subwindow(im, pos, sz, cos_window)

        is_first_frame = (frame == 0)
        # 不过不是第一帧,则计算分类器的响应
        if not is_first_frame:
            # 计算分类器在所有位置上的相应
            k = dense_gauss_kernel.dense_gauss_kernel(sigma, x, plot_tracking.z)
            kf = pylab.fft2(k)
            alphaf_kf = pylab.multiply(alphaf, kf)
            plot_tracking.response = pylab.real(pylab.ifft2(alphaf_kf))  # Eq. 9

            # 最大响应就是目标位置
            r = plot_tracking.response
            row, col = pylab.unravel_index(r.argmax(), r.shape)
            pos = pos - pylab.floor(sz / 2) + [row, col]

            if debug:
                print("Frame ==", frame)
                print("Max response", r.max(), "at", [row, col])
                pylab.figure()
                pylab.imshow(cos_window)
                pylab.title("cos_window")

                pylab.figure()
                pylab.imshow(x)
                pylab.title("x")

                pylab.figure()
                pylab.imshow(plot_tracking.response)
                pylab.title("response")
                pylab.show(block=True)

        # end "if not first frame"

        # 获取目标位置的余弦窗口,用于训练分类器
        x = get_subwindow.get_subwindow(im, pos, sz, cos_window)

        # kernel 最小方差正则化,在傅里叶域计算参数 ALPHA
        k = dense_gauss_kernel.dense_gauss_kernel(sigma, x)
        new_alphaf = pylab.divide(yf, (pylab.fft2(k) + lambda_value))  # Eq. 7
        new_z = x

        if is_first_frame:
            # 对于第一帧,训练单张图片
            alphaf = new_alphaf
            plot_tracking.z = x
        else:
            # 对于后续帧,进行模型参数插值
            f = interpolation_factor
            alphaf = (1 - f) * alphaf + f * new_alphaf
            plot_tracking.z = (1 - f) * plot_tracking.z + f * new_z

        # 保持当前位置,并计算 FPS
        positions[frame, :] = pos
        total_time += time.time() - start_time

        # 可视化显示跟踪的结果
        if show_tracking == "yes":
            plot_tracking.plot_tracking(frame, pos, target_sz, im, ground_truth)

    if should_resize_image:
        positions = positions * 2

    print("Frames-per-second:", len(img_files) / total_time)

    title = os.path.basename(os.path.normpath(input_video_path))

    if len(ground_truth) > 0:
        # 画出精确率图像
        show_precision.show_precision(positions, ground_truth, title)
Esempio n. 22
0
    def init(self, img, rect ):
        im_width = img.shape[1]
        im_heihgt= img.shape[0]
        ys = pylab.floor(rect[1]) + pylab.arange(rect[3], dtype=int)
        xs = pylab.floor(rect[0]) + pylab.arange(rect[2], dtype=int)
        ys = ys.astype(int)
        xs = xs.astype(int)
        # check for out-of-bounds coordinates,
        # and set them to the values at the borders
        ys[ys < 0] = 0
        ys[ys >= img.shape[0]] = img.shape[0] - 1

        xs[xs < 0] = 0
        xs[xs >= img.shape[1]] = img.shape[1] - 1
        roi = self.get_imageROI(img, rect)

        self.init_frame = img.copy()
        self.canvas     = img.copy()
        #pos is the center postion of the tracking object (cy,cx)
        pos = pylab.array([rect[1] + rect[3]/2, rect[0] + rect[2]/2])
        self.pos_list   = [pos]
        self.roi_list   = [roi]
        self.rect_list  = [rect]
        self.trackNo    = 0
        # parameters according to the paper --

        padding = 1.0  # extra area surrounding the target(扩大窗口的因子,默认扩大2倍)
        # spatial bandwidth (proportional to target)
        output_sigma_factor = 1 / float(16)
        self.sigma = 0.2  # gaussian kernel bandwidth
        self.lambda_value = 1e-2  # regularization
        # linear interpolation factor for adaptation
        #self.interpolation_factor = 0.075
        self.interpolation_factor = 0.01

        self.scale_ratios = [0.985, 0.99, 0.995, 1.0, 1.005, 1.01, 1.015]


        #target_ze equals to [rect3, rect2]
        target_sz = pylab.array([int(rect[3]), int(rect[2])])
        # window size(Extended window size), taking padding into account
        window_sz = pylab.floor(target_sz * (1 + padding))

        self.window_sz = window_sz
        self.window_sz_new = window_sz
        self.target_sz = target_sz

        # desired output (gaussian shaped), bandwidth proportional to target size
        output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor

        grid_y = pylab.arange(window_sz[0]) - pylab.floor(window_sz[0] / 2)
        grid_x = pylab.arange(window_sz[1]) - pylab.floor(window_sz[1] / 2)
        # [rs, cs] = ndgrid(grid_x, grid_y)
        rs, cs = pylab.meshgrid(grid_x, grid_y)
        y = pylab.exp(-0.5 / output_sigma ** 2 * (rs ** 2 + cs ** 2))
        self.yf= pylab.fft2(y)
        # store pre-computed cosine window
        self.cos_window = pylab.outer(pylab.hanning(window_sz[0]), pylab.hanning(window_sz[1]))


        # get subwindow at current estimated target position, to train classifer
        x = self.get_subwindow(img, pos, window_sz)
        # Kernel Regularized Least-Squares, calculate alphas (in Fourier domain)
        k = self.dense_gauss_kernel(self.sigma, x)
        #storing computed alphaf and z for next frame iteration
        self.alphaf = pylab.divide(self.yf, (pylab.fft2(k) + self.lambda_value))  # Eq. 7
        self.z = x

        #return initialization status
        return True
Esempio n. 23
0
def load_video_info(video_path):
    """
    加载给定视频路径下的视频相关的所有信息,包括:待检测的每帧图片名列表,首帧目标矩形框中心点位置 (1x2),首帧目标矩形框的
    一半 (1x2),是否将视频调整为一半(布尔型变量)缩放分辨率,计算精确度的 ground truth 信息 (Nx2, N 表示帧数),
    视频路径。坐标顺序采用 [y, x],对于的矩形框为高、宽。
    :param video_path: 视频或图像序列路径
    :return: 目标检测算法需要的相关信息,具体如上面描述
    """

    # 加载 ground truth 文件路径,MIL Track's 格式,以 _gt.txt 结尾。文件中包含目标中心点坐标和矩形框宽高值
    # text_files 是列表,其中第一个元素为 ground truth 文件的完整路径
    text_files = glob.glob(os.path.join(video_path, "*_gt.txt"))
    # 如果不存在该文件,则返回错误提示信息
    assert text_files, "No initial position and ground truth (*_gt.txt) to load."
    # 读取 ground truth 完整路径
    first_file_path = text_files[0]
    # 读取以逗号分隔的 ground truth 文件数值
    # 每行对应一帧,并包含 “x, y, width, height”; 请注意,此信息仅适用于每5帧中的1帧,其余信息用0填充
    # x, y 表示目标矩形框左下角坐标,width, height 表示目标矩形框宽度和高度
    ground_truth = pylab.loadtxt(first_file_path, delimiter=",")
    # 把第一帧的位置信息读出来作为首帧目标初始化
    first_ground_truth = ground_truth[0, :]
    # 获取首帧目标矩形框的高度和宽度
    target_sz = pylab.array([first_ground_truth[3], first_ground_truth[2]])
    # 获取首帧目标矩形框的中心坐标 [y_center, x_center]
    pos = [first_ground_truth[1], first_ground_truth[0]] + pylab.floor(
        target_sz / 2)
    # 如果 ground truth 非空,则进行插值其他帧的位置坐标值,并转化为目标中心坐标值
    if ground_truth is not None:
        # 分别依次插值 x, y, width, height 列,而不是一行一行插值
        for i in range(4):
            # xp 表示那些有非零值的 ground truth 行
            xp = range(0, ground_truth.shape[0], 5)
            # fp 表示非零值的 ground truth 每列的值
            fp = ground_truth[xp, i]
            # x 表示所有帧的个数组成的列表
            x = range(ground_truth.shape[0])
            # 调用 numpy 的一维线性插值为 ground truth 中为零的插值
            ground_truth[:, i] = pylab.interp(x, xp, fp)
        # 根据矩形框左下角坐标和高宽值计算得到目标中心点坐标
        ground_truth = ground_truth[:, [1, 0]] + ground_truth[:, [3, 2]] / 2
    # 如果不存在 ground truth 文件,则提示,并赋值为空
    else:
        print("Failed to gather ground truth data")
        ground_truth = []
    # 列举所有帧信息。首先从文件 "*_frames.txt" 中读取开始帧和结束帧数值,如果不存在,则读取 "imgs" 文件夹中的所有图片
    # 读取文件 "*_frames.txt" 的路径,结果是包含 "_frames.txt" 为结尾的所有文件的列表
    text_files = glob.glob(os.path.join(video_path, "*_frames.txt"))
    # 如果存在待检测帧开始帧和结束帧值文件
    if text_files:
        # 默认只有一个 "_frames.txt" 文件,所以第一个就是该文件,记录开始帧和结束帧
        first_file_path = text_files[0]
        # 读取开始帧和结束帧数值
        frames = pylab.loadtxt(first_file_path, delimiter=",", dtype=int)
        # 测试首帧图片是否存在与图片文件夹 "imgs" 中
        # %05i 表示替换值为整数,输出格式是5位,不足5位前面添加0
        test1_path_to_img = os.path.join(video_path,
                                         "imgs/img%05i.png" % frames[0])
        test2_path_to_img = os.path.join(video_path, "img%05i.png" % frames[0])
        # 如果图片在 "imgs" 文件夹下
        if os.path.exists(test1_path_to_img):
            video_path = os.path.join(video_path, "imgs/")
        # 如果图片在 video_path 文件夹下
        elif os.path.exists(test2_path_to_img):
            video_path = video_path
        # 如果图片既不在 "imgs" 文件夹下,也不在 video_path 文件夹下,则提示错误
        else:
            raise Exception("Failed to find the png images")
        # 把所有图片组成一个列表
        img_files = [
            "img%05i.png" % i for i in range(frames[0], frames[1] + 1)
        ]
    # 如果不存在 "_frames.txt" 文件,则直接从 video_path 读取图片
    else:
        # 把 video_path 下面的 "png" 文件组成列表
        img_files = glob.glob(os.path.join(video_path, "*.png"))
        # 如果没有 "png" 文件,则读取 "jpg" 文件
        if len(img_files) == 0:
            img_files = glob.glob(os.path.join(video_path, "*.jpg"))
        # 如果两种类型的图片都不存在,则提示错误
        assert len(img_files), "Failed to find png or jpg images"
        # 对图片列表进行按照字符串字典排列
        img_files.sort()
    # 如果初始目标矩形框太大,如超过100个像素,则缩放为原来一半,即使用低分辨率
    if pylab.sqrt(pylab.prod(target_sz)) >= 100:
        pos = pylab.floor(pos / 2)
        target_sz = pylab.floor(target_sz / 2)
        resize_image = True
    # 如果不是,则不用缩放
    else:
        resize_image = False
    # 返回视频(所有帧图像)的信息,包括每帧图像列表,首帧目标矩形框中心点坐标[y_center, x_center],首帧目标矩形框的
    # 高、宽的一半,是否对图片进行缩放,检测帧的 ground_truth,视频路径
    return [img_files, pos, target_sz, resize_image, ground_truth, video_path]
Esempio n. 24
0
def load_video_info(video_path):
    """
    Loads all the relevant information for the video in the given path:
    the list of image files (cell array of strings), initial position
    (1x2), target size (1x2), whether to resize the video to half
    (boolean), and the ground truth information for precision calculations
    (Nx2, for N frames). The ordering of coordinates is always [y, x].

    The path to the video is returned, since it may change if the images
    are located in a sub-folder (as is the default for MILTrack's videos).
    """

    # load ground truth from text file (MILTrack's format)
    text_files = glob.glob(os.path.join(video_path, "*_gt.txt"))
    assert text_files, \
        "No initial position and ground truth (*_gt.txt) to load."

    first_file_path = text_files[0]
    #f = open(first_file_path, "r")
    #ground_truth = textscan(f, '%f,%f,%f,%f') # [x, y, width, height]
    #ground_truth = cat(2, ground_truth{:})
    ground_truth = pylab.loadtxt(first_file_path, delimiter=",")
    #f.close()

    # set initial position and size
    first_ground_truth = ground_truth[0, :]
    # target_sz contains height, width
    target_sz = pylab.array([first_ground_truth[3], first_ground_truth[2]])
    # pos contains y, x center
    pos = [first_ground_truth[1], first_ground_truth[0]] \
        + pylab.floor(target_sz / 2)

    #try:
    if True:
        # interpolate missing annotations
        # 4 out of each 5 frames is filled with zeros
        for i in range(4):  # x, y, width, height
            xp = range(0, ground_truth.shape[0], 5)
            fp = ground_truth[xp, i]
            x = range(ground_truth.shape[0])
            ground_truth[:, i] = pylab.interp(x, xp, fp)
        # store positions instead of boxes
        ground_truth = ground_truth[:, [1, 0]] + ground_truth[:, [3, 2]] / 2
    #except Exception as e:
    else:
        print("Failed to gather ground truth data")
        #print("Error", e)
        # ok, wrong format or we just don't have ground truth data.
        ground_truth = []

    # list all frames. first, try MILTrack's format, where the initial and
    # final frame numbers are stored in a text file. if it doesn't work,
    # try to load all png/jpg files in the folder.

    text_files = glob.glob(os.path.join(video_path, "*_frames.txt"))
    if text_files:
        first_file_path = text_files[0]
        #f = open(first_file_path, "r")
        #frames = textscan(f, '%f,%f')
        frames = pylab.loadtxt(first_file_path, delimiter=",", dtype=int)
        #f.close()

        # see if they are in the 'imgs' subfolder or not
        test1_path_to_img = os.path.join(video_path,
                                         "imgs/img%05i.png" % frames[0])
        test2_path_to_img = os.path.join(video_path,
                                         "img%05i.png" % frames[0])
        if os.path.exists(test1_path_to_img):
            video_path = os.path.join(video_path, "imgs/")
        elif os.path.exists(test2_path_to_img):
            video_path = video_path  # no need for change
        else:
            raise Exception("Failed to find the png images")

        # list the files
        img_files = ["img%05i.png" % i
                     for i in range(frames[0], frames[1] + 1)]
        #img_files = num2str((frames{1} : frames{2})', 'img%05i.png')
        #img_files = cellstr(img_files);
    else:
        # no text file, just list all images
        img_files = glob.glob(os.path.join(video_path, "*.png"))
        if len(img_files) == 0:
            img_files = glob.glob(os.path.join(video_path, "*.jpg"))

        assert len(img_files), "Failed to find png or jpg images"

        img_files.sort()

    # if the target is too large, use a lower resolution
    # no need for so much detail
    if pylab.sqrt(pylab.prod(target_sz)) >= 100:
        pos = pylab.floor(pos / 2)
        target_sz = pylab.floor(target_sz / 2)
        resize_image = True
    else:
        resize_image = False

    ret = [img_files, pos, target_sz, resize_image, ground_truth, video_path]
    return ret
def track(descriptor):
    global options
    desc_channel_count = descriptor.initialize(options.use_gpu)

    roi = loader.track_bounding_box_from_first_frame()
    roi = [
        roi[0] + roi[2] / 2, roi[1] + roi[3] / 2, roi[2], roi[3],
        roi[2] * (1 + kcf_params.padding), roi[3] * (1 + kcf_params.padding)
    ]

    output_sigma = pylab.sqrt(pylab.prod([roi[3], roi[2]
                                          ])) * kcf_params.output_sigma_factor

    avg_count = 0

    global cos_window
    cos_window = None
    template = [None for i in range(desc_channel_count)]
    alpha_f = [None for i in range(desc_channel_count)]
    response = [None for i in range(desc_channel_count)]
    yf = None

    track_time = 0
    full_track_time = time.time()
    while loader.has_next_frame():
        im = loader.next_frame()

        if (loader.frame_number() % 10) == 0:
            print("Processing frame {}".format(loader.frame_number()))

        start_time = time.time()

        is_first_frame = loader.frame_number() == 0

        cropped = get_subwindow(im, roi)
        channels = descriptor.describe(cropped)
        subwindow = apply_cos_window(channels)
        subwindow = crop(subwindow)
        dmv = None

        if is_first_frame:
            grid_y = pylab.arange(subwindow.shape[1]) - pylab.floor(
                subwindow.shape[1] / 2)
            grid_x = pylab.arange(subwindow.shape[2]) - pylab.floor(
                subwindow.shape[2] / 2)

            rs, cs = pylab.meshgrid(grid_x, grid_y)
            y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2))
            yf = pylab.fft2(y)
        else:

            for i in range(0, subwindow.shape[0]):
                channel = subwindow[i, :, :]

                # calculate response of the classifier at all locations
                k = dense_gauss_kernel(kcf_params.sigma, channel, template[i])
                kf = pylab.fft2(k)
                alphaf_kf = pylab.multiply(alpha_f[i], kf)
                response[i] = pylab.real(pylab.ifft2(alphaf_kf))  # Eq. 9

                # argmax = response[i].argmax()
                #
                # if response[i].item(argmax) != 0:
                #     tmp = pylab.unravel_index(argmax, response[i].shape)
                #     if value < response[i][tmp[0],tmp[1]]:
                #         avg_x = tmp[1]
                #         avg_y = tmp[0]
                #         avg_count = 1
                #         value = response[i][tmp[0],tmp[1]]
                #         chosen_i = i

            anchor = torch.tensor(channels[:, channels.shape[1] / 2,
                                           channels.shape[2] / 2]).unsqueeze(0)
            points = torch.tensor(response).view(channels.shape[0], -1).t()

            dmv = distance_matrix_vector(anchor,
                                         points).view(channels.shape[1],
                                                      channels.shape[2])

            argmax = np.array(dmv).argmax()
            tmp = pylab.unravel_index(argmax, subwindow.shape[1:])
            moved_by = [
                float(tmp[0]) - float(subwindow.shape[1]) / 2,
                float(tmp[1]) - float(subwindow.shape[2]) / 2
            ]
            roi = descriptor.update_roi(roi, moved_by)

        cropped = get_subwindow(im, roi)
        channels = descriptor.describe(cropped)
        subwindow = apply_cos_window(channels)
        subwindow = crop(subwindow)

        for i in range(0, subwindow.shape[0]):

            channel = subwindow[i, :, :]

            k = dense_gauss_kernel(kcf_params.sigma, channel)
            new_alpha_f = pylab.divide(
                yf, (pylab.fft2(k) + kcf_params.lambda_value))  # Eq. 7
            new_template = channel

            if is_first_frame:
                alpha_f[i] = new_alpha_f
                template[i] = new_template
            else:
                f = kcf_params.interpolation_factor
                alpha_f[i] = (1 - f) * alpha_f[i] + f * new_alpha_f
                template[i] = (1 - f) * template[i] + f * new_template

        track_time += time.time() - start_time

        results.log_tracked(im, roi, False, template[0], dmv)
    # end of "for each image in video"

    results.log_meta("speed.frames_tracked", loader.frame_number())
    results.log_meta("speed.track_no_io_time", str(track_time) + "s")
    results.log_meta("speed.track_no_io_fps",
                     loader.frame_number() / track_time)
    results.log_meta("speed.track_no_init_time",
                     str(time.time() - full_track_time) + "s")

    results.show_precision()

    return