Example #1
0
def padding(nframes, x, y):
    # dirty hacky padding
    ba = (nframes - 1) / 2 # before // after
    x2 = copy.deepcopy(x)
    on_x2 = False
    x_f = np.zeros((x.shape[0], nframes * x.shape[1]), dtype='float64')
    print 'x_f shape:',x_f.shape
    for i in xrange(x.shape[0]):
        if y[i] == '!ENTER[2]' and y[i-1] != '!ENTER[2]': # TODO general case
            on_x2 = not on_x2
            if on_x2:
                x2[i - ba:i,:] = 0.0
            else:
                x[i - ba:i,:] = 0.0
        if i+ba < y.shape[0] and '!EXIT' in y[i] and not '!EXIT' in y[i+ba]: # TODO general
            if on_x2:
                x2[i+ba:i+2*ba+1,:] = 0.0
            else:
                x[i+ba:i+2*ba+1,:] = 0.0
        if on_x2:
            x_f[i] = np.pad(x2[max(0, i - ba):i + ba + 1].flatten(),
                    (max(0, (ba - i) * x.shape[1]), 
                        max(0, ((i+ba+1) - x.shape[0]) * x.shape[1])),
                    'constant', constant_values=(0,0))
        else:
            x_f[i] = np.pad(x[max(0, i - ba):i + ba + 1].flatten(),
                    (max(0, (ba - i) * x.shape[1]), 
                        max(0, ((i+ba+1) - x.shape[0]) * x.shape[1])),
                    'constant', constant_values=(0,0))
    return x_f
Example #2
0
def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    dx, dw, db = None, None, None
    x, w, b, conv_param = cache
    stride = conv_param['stride']
    pad = conv_param['pad']
    N, C, H, W = x.shape
    F, _, HH, WW = w.shape
    Hp = 1 + (H + 2 * pad - HH) / stride
    Wp = 1 + (W + 2 * pad - WW) / stride
    dx = np.zeros(x.shape)
    dw = np.zeros(w.shape)
    db = np.zeros(b.shape)
    for i in xrange(N):
        # for j in xrange(F):
        data = x[i]
        data = np.pad(data, ((0, 0), (pad, pad), (pad, pad)), 'constant')
        paded_dxi = np.pad(dx[i], ((0, 0), (pad, pad), (pad, pad)), 'constant')
        filter_vert_indices = 0
        filter_hori_indices = 0
        for s in xrange(Hp):
            filter_hori_indices = 0
            for p in xrange(Wp):
                data_fragment = data[:, filter_vert_indices:filter_vert_indices+HH,
                                                         filter_hori_indices:filter_hori_indices+WW]
                dw += np.einsum('i, jkl->ijkl', dout[i, :, s, p], data_fragment)
                # paded_dxi[:, filter_vert_indices:filter_vert_indices+HH,
                #                                          filter_hori_indices:filter_hori_indices+WW] = \
                #                                          np.einsum('ijkl,i->jkl', w, dout[i, :, s, p])
                # paded_dxi[:, filter_vert_indices:filter_vert_indices+HH,
                #                                          filter_hori_indices:filter_hori_indices+WW] = \
                #                                          np.tensordot(w, dout[i, :, s, p], axes = ([0], [0]))
                for f in xrange(F):
                    paded_dxi[:, filter_vert_indices:filter_vert_indices+HH,
                                        filter_hori_indices:filter_hori_indices+WW] \
                                         += w[f] * dout[i, f, s, p]
                filter_hori_indices += stride
            filter_vert_indices += stride
        dx[i] = paded_dxi[:, pad:-pad, pad:-pad]
    db = np.einsum('ijkl->j', dout)
    # print(dx)

    #############################################################################
    # TODO: Implement the convolutional backward pass.                          #
    #############################################################################
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
    return dx, dw, db
Example #3
0
def _2d_filter(mat, win2d, matsign, pad):
    """
    Filtering an image using a 2D window.

    Parameters
    ----------
    mat : 2D array of floats
    nrow : int
        Height of the window.
    ncol: int 
        Width of the window.
    sigma: tuple of 2 floats
        Sigmas of the window.
    pad : int
        Padding.

    Returns
    -------    
        Filtered image.
    """
    matpad = np.pad(mat, ((0, 0), (pad, pad)), mode='edge')
    matpad = np.pad(matpad, ((pad, pad), (0, 0)), mode='mean')
    (nrow, ncol) = matpad.shape
    matfilter = np.real(ifft2(fft2(matpad * matsign) * win2d) * matsign)
    return matfilter[pad:nrow - pad, pad:ncol - pad]
Example #4
0
  def eval(self, data, label, lens):
    predictions = []
    vals = []
    for i in range(data.shape[0]/self.batch_size):
      D = data[range(self.batch_size*i,self.batch_size*(i+1))]
      L = label[range(self.batch_size*i,self.batch_size*(i+1))]
      if lens is not None:
        l = lens[range(self.batch_size*i,self.batch_size*(i+1))]
        feed_dict={self.dataset:D, self.labels:L, self.lengths:l}
      else:
        feed_dict={self.dataset:D, self.labels:L}
      predictions.extend(self.sess.run(self.correct_prediction, feed_dict))
      vals.extend(self.sess.run(tf.argmax(self.logits,1), feed_dict))

    ## DO THE EXTRA
    last_chunk = self.batch_size*(i+1)
    gap = self.batch_size - (data.shape[0] - last_chunk)
    D = np.pad(data[last_chunk:], ((0,gap),(0,0)), mode='constant', constant_values=0)
    L = np.pad(label[last_chunk:], ((0,gap),(0,0)), mode='constant', constant_values=0)
    if lens is not None:
      l = np.pad(lens[last_chunk:], (0,gap), mode='constant', constant_values=0)
      feed_dict={self.dataset:D, self.labels:L, self.lengths:l}
    else:
      feed_dict={self.dataset:D, self.labels:L}
    predictions.extend(self.sess.run(self.correct_prediction, feed_dict)[:self.batch_size - gap])
    vals.extend(self.sess.run(tf.argmax(self.logits,1), feed_dict)[:self.batch_size - gap])

    print vals

    ## PRINT THE PREDICTONS
    return 100.0*sum(predictions)/len(predictions)
Example #5
0
def padding(nframes, x, y):
    """ Dirty hacky padding for a minimum of nframes """
    b_a = (nframes - 1) / 2 # before // after
    x_2 = copy.deepcopy(x)
    on_x_2 = False
    x_f = zeros((x.shape[0], nframes * x.shape[1]), dtype='float32')
    for i in xrange(x.shape[0]):
        if y[i] == '!ENTER[2]' and y[i-1] != '!ENTER[2]': # TODO general case
            on_x_2 = not on_x_2
            if on_x_2:
                x_2[i - b_a:i, :] = 0.0
            else:
                x[i - b_a:i, :] = 0.0
        if i+b_a < y.shape[0] and '!EXIT' in y[i] and not '!EXIT' in y[i+b_a]:
            # TODO general case
            if on_x_2:
                x_2[i+b_a:i+2*b_a+1, :] = 0.0
            else:
                x[i+b_a:i+2*b_a+1, :] = 0.0
        if on_x_2:
            x_f[i] = pad(x_2[max(0, i - b_a):i + b_a + 1].flatten(),
                    (max(0, (b_a - i) * x.shape[1]),
                        max(0, ((i+b_a+1) - x.shape[0]) * x.shape[1])),
                    'constant', constant_values=(0, 0))
        else:
            x_f[i] = pad(x[max(0, i - b_a):i + b_a + 1].flatten(),
                    (max(0, (b_a - i) * x.shape[1]),
                        max(0, ((i+b_a+1) - x.shape[0]) * x.shape[1])),
                    'constant', constant_values=(0, 0))
    return x_f
Example #6
0
def test_offset():
    X,Y=np.mgrid[-5:5:0.05,-5:5:0.05]
    Z=np.sqrt(X**2+Y**2)+np.sin(X**2+Y**2)
    Z2 = Z.copy()
    for i in range(15):
        dx, dy = lmi.find_offset(Z, Z2)
        dx2, dy2 = lmi.find_offset(Z2, Z)
        assert_array_equal([dx, dx2], [0,0])
        assert_array_equal([dy, dy2], [i, -i])
        Z2 = np.pad(Z2, ((0,0),(1,0)), mode='constant')[:, :-1]

    Z2 = Z.copy()
    for i in range(15):
        dx, dy = lmi.find_offset(Z, Z2)
        dx2, dy2 = lmi.find_offset(Z2, Z)
        assert_array_equal([dx, dx2], [i,-i])
        assert_array_equal([dy, dy2], [0,0])
        Z2 = np.pad(Z2, ((1,0),(0,0)), mode='constant')[:-1, :]

    Z2 = Z.copy()
    for i in range(15):
        dx, dy = lmi.find_offset(Z, Z2)
        dx2, dy2 = lmi.find_offset(Z2, Z)
        assert_array_equal([dx, dx2], [i,-i])
        assert_array_equal([dy, dy2], [i, -i])
        Z2 = np.pad(Z2, ((1,0),(1,0)), mode='constant')[:-1, :-1]
Example #7
0
  def SMeval(self, DWi, DU, Dlens, DWj, keep_predictions=False):
    """
    Runs eval on dev/test data with the option to return predictions or performance
    """
    predictions = []
    for i in range(len(DWi)/self.batch_size):
      batch_range = range(self.batch_size*i,self.batch_size*(i+1))
      wi = DWi[batch_range]
      wj = DWj[batch_range]
      U = DU[batch_range]
      lens = Dlens[batch_range]
      feed_dict = {self.cur_world: wi, self.next_world: wj, self.inputs: U, self.lengths: lens}
      if keep_predictions:
        predictions.extend(self.sess.run(tf.argmax(self.logits,1), feed_dict))
      else:
        predictions.extend(self.sess.run(self.correct_prediction, feed_dict))

    ## Grab the extras
    last_chunk = self.batch_size*(i+1)
    gap = self.batch_size - (len(DWi) - last_chunk)
    wi = np.pad(DWi[last_chunk:], ((0,gap),(0,0), (0,0), (0,0)), mode='constant', constant_values=0)
    wj = np.pad(DWj[last_chunk:], ((0,gap),(0,0)), mode='constant', constant_values=0)
    U = np.pad(DU[last_chunk:], ((0,gap),(0,0)), mode='constant', constant_values=0)
    lens = np.pad(Dlens[last_chunk:], ((0,gap)), mode='constant', constant_values=0)
    feed_dict = {self.cur_world: wi, self.next_world: wj, self.inputs: U, self.lengths: lens}
    if keep_predictions:
      predictions.extend(self.sess.run(tf.argmax(self.logits,1), feed_dict)[:self.batch_size - gap])
      return predictions
    else:
      predictions.extend(self.sess.run(self.correct_prediction, feed_dict)[:self.batch_size - gap])
      return 100.0*sum(predictions)/len(predictions)
Example #8
0
def test_uninterpolated_nan_regions(boundary, normalize_kernel):
    #8086
    # Test NaN interpolation of contiguous NaN regions with kernels of size
    # identical and greater than that of the region of NaN values.

    # Test case: kernel.shape == NaN_region.shape
    kernel = Gaussian2DKernel(1, 5, 5)
    nan_centroid = np.full(kernel.shape, np.nan)
    image = np.pad(nan_centroid, pad_width=kernel.shape[0]*2, mode='constant',
                   constant_values=1)
    with pytest.warns(AstropyUserWarning,
                      match="nan_treatment='interpolate', however, NaN values detected "
                      "post convolution. A contiguous region of NaN values, larger "
                      "than the kernel size, are present in the input array. "
                      "Increase the kernel size to avoid this."):
        result = convolve(image, kernel, boundary=boundary, nan_treatment='interpolate',
                          normalize_kernel=normalize_kernel)
        assert(np.any(np.isnan(result)))

    # Test case: kernel.shape > NaN_region.shape
    nan_centroid = np.full((kernel.shape[0]-1, kernel.shape[1]-1), np.nan) # 1 smaller than kerenel
    image = np.pad(nan_centroid, pad_width=kernel.shape[0]*2, mode='constant',
                   constant_values=1)
    result = convolve(image, kernel, boundary=boundary, nan_treatment='interpolate',
                      normalize_kernel=normalize_kernel)
    assert(~np.any(np.isnan(result))) # Note: negation
Example #9
0
def tile_images_make_tiles(data, padsize=1, padval=0, width=None, highlights = None):
    height,width = get_tiles_height_width(data.shape[0], desired_width = width)

    # Old one-way padding, no highlights
    #padding = ((0, width*height - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3)
    #data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))

    # New two-way padding with highlights
    #padding = ((0, width*height - data.shape[0]), (padsize, padsize), (padsize, padsize)) + ((0, 0),) * (data.ndim - 3)
    #print 'tile_images: data min,max =', data.min(), data.max()
    #padder = SmartPadder()
    ##data = np.pad(data, padding, mode=jy_pad_fn)
    #data = np.pad(data, padding, mode=padder.pad_function)
    #print 'padder.calls =', padder.calls

    # New new way, two-way padding with highlights
    if highlights is not None:
        assert len(highlights) == data.shape[0]
    padding = ((0, width*height - data.shape[0]), (padsize, padsize), (padsize, padsize)) + ((0, 0),) * (data.ndim - 3)

    # First pad with constant vals
    try:
        len(padval)
    except:
        padval = tuple((padval,))
    assert len(padval) in (1,3), 'padval should be grayscale (len 1) or color (len 3)'
    if len(padval) == 1:
        data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))
    else:
        data = np.pad(data, padding, mode='constant', constant_values=(0, 0))
        for cc in (0,1,2):
            # Replace 0s with proper color in each channel
            data[:padding[0][0],  :, :, cc] = padval[cc]
            if padding[0][1] > 0:
                data[-padding[0][1]:, :, :, cc] = padval[cc]
            data[:, :padding[1][0],  :, cc] = padval[cc]
            if padding[1][1] > 0:
                data[:, -padding[1][1]:, :, cc] = padval[cc]
            data[:, :, :padding[2][0],  cc] = padval[cc]
            if padding[2][1] > 0:
                data[:, :, -padding[2][1]:, cc] = padval[cc]
    if highlights is not None:
        # Then highlight if necessary
        for ii,highlight in enumerate(highlights):
            if highlight is not None:
                data[ii,:padding[1][0],:,:] = highlight
                if padding[1][1] > 0:
                    data[ii,-padding[1][1]:,:,:] = highlight
                data[ii,:,:padding[2][0],:] = highlight
                if padding[2][1] > 0:
                    data[ii,:,-padding[2][1]:,:] = highlight



    # tile the filters into an image
    data = data.reshape((height, width) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
    data = data.reshape((height * data.shape[1], width * data.shape[3]) + data.shape[4:])
    data = data[0:-padsize, 0:-padsize]  # remove excess padding

    return (height,width), data
Example #10
0
def pad(x, nf, ma=0):
    """ pad x for nf frames with margin ma. """
    ba = (nf - 1) / 2  # before/after
    if ma:
        ret = numpy.zeros((x.shape[0] - 2 * ma, x.shape[1] * nf), dtype=theano.config.floatX)
        if ba <= ma:
            for j in xrange(ret.shape[0]):
                ret[j] = x[j : j + 2 * ma + 1].flatten()
        else:
            for j in xrange(ret.shape[0]):
                ret[j] = numpy.pad(
                    x[max(0, j - ba) : j + ba + 1].flatten(),
                    (max(0, (ba - j) * x.shape[1]), max(0, ((j + ba + 1) - x.shape[0]) * x.shape[1])),
                    "constant",
                    constant_values=(0, 0),
                )
        return ret
    else:
        ret = numpy.zeros((x.shape[0], x.shape[1] * nf), dtype=theano.config.floatX)
        for j in xrange(x.shape[0]):
            ret[j] = numpy.pad(
                x[max(0, j - ba) : j + ba + 1].flatten(),
                (max(0, (ba - j) * x.shape[1]), max(0, ((j + ba + 1) - x.shape[0]) * x.shape[1])),
                "constant",
                constant_values=(0, 0),
            )
        return ret
Example #11
0
def paddingAnswers(answerSheet1, blankSheet1):
   numRowsA, numColsA, numBandsA, dataTypeA = ipcv.dimensions(answerSheet1)
   numRowsB, numColsB, numBandsB, dataTypeB = ipcv.dimensions(blankSheet1)
   print numRowsB, numColsB
   if numBandsA == 3:
      answerSheet = cv2.cvtColor(answerSheet1, cv.CV_BGR2GRAY)
   elif numBandsA == 1:
      answerSheet = answerSheet1

   if numBandsB == 3:
      blankSheet = cv2.cvtColor(blankSheet1, cv.CV_BGR2GRAY)
   elif numBandsB == 1:
      blankSheet = blankSheet1  

   pad = numpy.absolute(numRowsA - numColsA)/2.0
   maxCount = numpy.max(blankSheet)

   if (numRowsA-numColsA) % 2 != 0:
      answerSheet = numpy.pad(answerSheet, ((0,0),(pad,pad+1)), 'constant', constant_values=((maxCount, maxCount),(maxCount,maxCount)))
   elif (numRowsA-numColsA) % 2 == 0:
      answerSheet = numpy.pad(answerSheet, ((0,0),(pad,pad)), 'constant', constant_values=((maxCount, maxCount),(maxCount,maxCount)))

   pad1 = numpy.absolute(numRowsB - numColsB)/2.0
   maxCount = numpy.max(blankSheet)

   if (numRowsB-numColsB) % 2 != 0:
      blankSheet = numpy.pad(blankSheet, ((0,0),(pad1,pad1+1)), 'constant', constant_values=((maxCount, maxCount),(maxCount,maxCount)))
   elif (numRowsA-numColsA) % 2 == 0:
      blankSheet = numpy.pad(blankSheet, ((0,0),(pad1,pad1)), 'constant', constant_values=((maxCount, maxCount),(maxCount,maxCount)))


   return answerSheet, blankSheet
Example #12
0
def qea(im):
    H = ss.hilbert(im,axis = 2)
    H = im+1j*H
    ia = np.abs(H)
    ip = np.angle(H)

    h1col = H[1:-1,:,:]
    h0col = H[:-2,:,:]
    h2col = H[2:,:,:]
    ifColSign = np.sign(np.real((h0col-h2col)/(2j*h1col)))
    ifCol = np.arccos((h2col+h0col)/(2*h1col))
    ifCol = (np.abs(ifCol)*ifColSign)/np.pi/2

    ifCol = np.pad(ifCol,((1,1),(0,0),(0,0)), mode='reflect')
    
    h0row = H[:,:-2,:]
    h1row = H[:,1:-1,:]
    h2row = H[:,2:,:]
    #ifxSign = np.sign(np.real((h2x-h0x)/(2j*h1x)))
    ifRow = np.arccos((h2row+h0row)/(2*h1row))
    ifRow = (np.abs(ifRow))/np.pi/2

    ifRow = np.pad(ifRow,((0,0),(1,1),(0,0)), mode='reflect')

    h0time = H[:,:,:-2]
    h1time = H[:,:,1:-1]
    h2time = H[:,:,2:]
    #ifxSign = np.sign(np.real((h2x-h0x)/(2j*h1x)))
    ifTime = np.arccos((h2time+h0time)/(2*h1time))
    ifTime = (np.abs(ifTime))/np.pi/2

    ifTime = np.pad(ifTime,((0,0),(0,0),(1,1)), mode='reflect')
    
    return(ia,ip,ifRow,ifCol,ifTime)
Example #13
0
def deepflow2( im1=None, im2=None, match=None, options=""):
    """
    flow = deepflow2.deepflow2(image1, image2, match=None, options='')
    Compute the flow between two images, eventually using given matches.
    Images must be HxWx3 numpy arrays (convert to float32).
    Match is an optional numpy array argument (None by default, ie no input match), where each row starts by x1 y1 x2 y2.
    Options is an optional string argument ('' by default), to set the options. Type deepflow2() to see the list of available options.
    The function returns the optical flow as a HxWx2 numpy array."""
#convert images
    if None in (im1,im2):
        usage_python()
        return
    assert im1.shape == im2.shape, "images must have the same shape"
    if im1.dtype != float32:
        im1 = im1.astype(float32)
    if im2.dtype != float32:
        im2 = im2.astype(float32)
    h, w, nchannels = im1.shape
    assert nchannels==3, "images must have 3 channels"
    stride = 4*((w+3)//4)
    im1 = pad( rollaxis(im1,2), ((0,0),(0,0),(0, stride-w)), 'constant')
    im2 = pad( rollaxis(im2,2), ((0,0),(0,0),(0, stride-w)), 'constant')
# allocate flow
    flowx = empty((h,stride), dtype=float32)
    flowy = empty((h,stride), dtype=float32)
# compute flow
    if match is not None:
        assert match.shape[1]>=4
        match = ascontiguousarray(match[:,:4], dtype=float32)
    deepflow2_numpy( w, flowx, flowy, im1, im2, match, options)
    return concatenate ( (flowx[:,:w,None], flowy[:,:w,None]), axis=2)
Example #14
0
File: lmi.py Project: sargas/dct
def _zero_pad_to_same_size(a, b):
    [ay, ax], [by, bx] = a.shape, b.shape
    if ax < bx or ay < by:
        a = np.pad(a, ( (by-ay,0),(bx-ax,0) ), mode='constant')
    elif ax > bx or ay > by:
        b = np.pad(b, ( (ay-by,0),(ax-bx,0) ), mode='constant')
    return a, b, [ax-bx, ay-by]
Example #15
0
def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    # Setting up
    x, w, b, conv_param = cache
    dx = np.zeros_like(x)
    dw = np.zeros_like(w)
    db = np.zeros_like(b)

    stride = conv_param['stride']
    pad = conv_param['pad']
    N, C, H, W = x.shape
    F, _, HH, WW = w.shape
    h_out = 1 + (H + 2 * pad - HH) / stride
    w_out = 1 + (W + 2 * pad - WW) / stride
    #############################################################################
    # TODO: Implement the convolutional backward pass.                          #
    #############################################################################
    # Padding x and dx
    x_padded = np.pad(x, [(0, 0), (0, 0), (pad, pad), (pad, pad)], mode='constant')
    dx_padded = np.pad(dx, [(0, 0), (0, 0), (pad, pad), (pad, pad)], mode='constant')
    # Do the convolutions

    # For every image, pass it through the filter and update the output
    for image in range(N):
        for filter in range(F):
            # Then, do the convolutions in over H and W
            for height in xrange(h_out):
                end_point_height = height * stride
                for width in xrange(w_out):
                    end_point_width = width * stride

                    # Make the convolution window
                    conv_window = x_padded[image, :, end_point_height:end_point_height + HH,
                                           end_point_width:end_point_width + WW]

                    # And update the derivatives
                    db[filter] += dout[image, filter, height, width]
                    dw[filter] += conv_window * dout[image, filter, height, width]
                    # Update DX at the convolution window
                    dx_padded[image, :, end_point_height:end_point_height + HH, end_point_width:end_point_width + WW] += \
                        w[filter] * \
                        dout[image, filter, height, width]

                    # And remove the padding
                    dx = dx_padded[:, :, pad:pad + H, pad:pad + W]
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
    return dx, dw, db
Example #16
0
  def _DataToInputs(self, spec, labels, weighted_labels, length, filename,
                    truncated_length):
    # This method re-implements a portion of the TensorFlow graph using numpy.
    # While typically it is frowned upon to test complicated code with other
    # code, there is no way around this for testing the pipeline end to end,
    # which requires an actual spec computation. Furthermore, much of the
    # complexity of the pipeline is due to the TensorFlow implementation,
    # so comparing it against simpler numpy code still provides effective
    # coverage.
    truncated_length = (min(truncated_length, length)
                        if truncated_length else length)

    # Pad or slice spec if differs from truncated_length.
    if len(spec) < truncated_length:
      pad_amt = truncated_length - len(spec)
      spec = np.pad(spec, [(0, pad_amt), (0, 0)], 'constant')
    else:
      spec = spec[0:truncated_length]

    # Pad or slice labels if differs from truncated_length.
    if len(labels) < truncated_length:
      pad_amt = truncated_length - len(labels)
      labels = np.pad(labels, [(0, pad_amt), (0, 0)], 'constant')
    else:
      labels = labels[0:truncated_length]

    inputs = [[spec, labels, truncated_length, filename]]

    return inputs
Example #17
0
def zero_padding():
    samples = 77
    rec_period = 4
    sampling_rate = samples/rec_period
    time = np.linspace(0, rec_period, samples)
    sin = np.sin(time*3.75*np.pi)
    win = np.hanning(len(sin))

    pad_count = 23
    padded_sin = np.pad(sin, (0,pad_count), 'constant')

    fft = np.fft.rfft(sin)
    fft_padded = np.fft.rfft(padded_sin)
    bins = (np.fft.rfftfreq(len(sin))*sampling_rate)[1:]

    plt.subplot(321)
    plt.plot(time, sin)
    plt.subplot(322)
    plt.plot(bins, (np.abs(fft))[1:]*2/samples, "o")
    plt.subplot(323)
    plt.plot(np.linspace(0, (samples+pad_count)*rec_period/float(samples), samples+pad_count), padded_sin)
    plt.subplot(324)
    plt.plot((np.fft.rfftfreq(len(padded_sin))*sampling_rate)[1:], (np.abs(fft_padded))[1:]*2/samples, "o")
    plt.subplot(325)
    padded_sin_win = np.pad(sin*win, (0, pad_count), 'constant')
    plt.plot(np.linspace(0, (samples+pad_count)*rec_period/float(samples), samples+pad_count), padded_sin_win)
    plt.subplot(326)
    plt.plot((np.fft.rfftfreq(len(padded_sin_win))*sampling_rate)[1:], np.abs(np.fft.rfft(padded_sin_win))[1:]*2/samples, "o")
    matplotlib2tikz.save( 'myfile.tikz' )
    plt.show()
    def do(self,s, sigma, aArray, cArray): 
        # center of filter (in both directions) 
        c = s / 2
        
        # extend aArray and cArray
        aaArray = np.pad(aArray, ((c, c), (c, c), (c, c)),'edge')
        ccArray = np.pad(cArray, ((c, c), (c, c), (c, c)), 'constant', constant_values=(False))
        
        c = float(c)

        # define gaussian function 
        def gaussian(cx, cy, cz, w): 
            return lambda x,y,z: np.exp(-(((cx-x)/w)**2 + ((cy-y)/w)**2 + ((cz-z)/w)**2)/2) 
    
        # define gaussian filter 
        x,y,z = np.mgrid[0:s,0:s,0:s] 
        filt = gaussian(c,c,c,sigma)(x,y,z)
        print filt
        
        dArray = aaArray[:,:,:];
        Ashape = aaArray.shape
        
        c = int(c)
        
        for i in range(c, Ashape[0] - c):
            for j in range(c, Ashape[1] - c):
                for k in range(c, Ashape[2] - c):
                    if ccArray[i][j][k] == False:
                        continue
                    tempA = aaArray[(i - c):(i + c + 1), (j - c):(j + c + 1), (k - c):(k + c + 1)]
                    tempC = ccArray[(i - c):(i + c + 1), (j - c):(j + c + 1), (k - c):(k + c + 1)]
                    mask = np.where(tempC) 
                    
                    dArray[i][j][k] = np.sum(tempA[mask]*filt[mask])/np.sum(filt[mask])
        return dArray[c:-c, c:-c, c:-c]
Example #19
0
 def _extend_data_to_include(self, url, url2):
     index_1, index_2 = self.get_indexes(url, url2)
     missing = index_1 < 0 or index_2 < 0
     if missing:
         none_index = self.get_index(None)
         if none_index >= len(self.known_urls) - 2:
             #extend per 500 for performance
             self.known_urls = self.known_urls + [None]*500
         if index_1 < 0:
             self.known_urls[none_index] = url
             none_index += 1
         if index_2 < 0:
             self.known_urls[none_index]  = url2
             none_index += 1
         size = len(self.known_urls)
         
         padding = size - self.click_matrix.shape[0]     
         self.click_matrix = \
             np.matrix(np.pad(self.click_matrix,
                              pad_width=([0,padding], 
                                         [0,padding]), 
                                         mode='constant'))
         
         padding = size - len(self.spend_time)
         self.spend_time = np.pad(self.spend_time, pad_width=(0, padding), 
                                  mode='constant')
Example #20
0
 def __init__(self,h5_path,image_paths,max_q=None,max_mc=None):
     self.h5 = h5py.File(h5_path,mode='r')
     self.image_ids = self.h5['image_ids'].value
     self.questions = self.h5['questions'].value
     self.multiple_choice = self.h5['multiple_choice'].value
     self.answers = self.h5['answers'].value
     self.bounding_boxes = dict((k,v) for (k,v) in zip(self.h5['img_list'].value, 
                                                       self.h5['bounding_boxes'].value))
     self.N = len(self.image_ids)
     if max_q:
         if max_q<self.questions.shape[1]:
             self.questions = self.questions[:,:max_q]
         else:
             self.questions = np.pad(self.questions,
                                     ((0,0),(0,max_q-self.questions.shape[-1])),
                                     'constant',constant_values=a_w2i['</s>'])
     if max_mc:
         if max_mc<self.multiple_choice.shape[-1]:
             self.multiple_choice = self.multiple_choice[:,:,max_mc]
         else:
             self.multiple_choice = np.pad(self.multiple_choice,
                                           ((0,0),(0,0),(0,max_mc-self.multiple_choice.shape[-1])),
                                           'constant',constant_values=a_w2i['</s>'])
     self.max_q = self.questions.shape[1]
     self.indexes = np.arange(self.N)
     self.image_paths = image_paths
Example #21
0
def get_input_matrices(batch_data):
    X_prem, X_hypo, y = process(batch_data)
    batch_size = len(X_prem)

    # Maximum length of premise sentence
    MAX_LENGTH_PREM = max([len(entry) for entry in X_prem])
    # Maximum length of hypothesis sentence
    MAX_LENGTH_HYPO = max([len(entry) for entry in X_hypo])

    # Mask is used in Lasagne LSTM layer
    X_prem_mask = np.zeros((batch_size, MAX_LENGTH_PREM))
    X_hypo_mask = np.zeros((batch_size, MAX_LENGTH_HYPO))
    

    for i in range(batch_size):
        X_prem_mask[i, :len(X_prem[i])] = 1
        X_prem[i] = np.pad(X_prem[i], [(0, MAX_LENGTH_PREM - len(X_prem[i])), (0, 0)], 'constant')

    for i in range(batch_size):
        X_hypo_mask[i, :len(X_hypo[i])] = 1
        X_hypo[i] = np.pad(X_hypo[i], [(0, MAX_LENGTH_HYPO - len(X_hypo[i])), (0, 0)], 'constant')

    X_prem = np.asarray(X_prem)
    X_hypo = np.asarray(X_hypo)

    y = np.asarray(y)

    return X_prem, X_prem_mask, X_hypo, X_hypo_mask, y
def saveSlidingWindows((im_path,filter_size,step_size,out_file_pre,idx)):
    print idx;
    im=scipy.misc.imread(im_path);

    pad_r=getPadTuple(im.shape[0],filter_size[0],step_size);
    pad_c=getPadTuple(im.shape[1],filter_size[1],step_size);
    if len(im.shape)>2:
        im=np.pad(im,(pad_r,pad_c,(0,0)),'edge')
    else:
        im=np.pad(im,(pad_r,pad_c),'edge');
    start_r=0;
    idx_r=0;
    
    out_files=[];
    while start_r<im.shape[0]:
        start_c=0;
        idx_c=0;
        while start_c<im.shape[1]:

            end_r=start_r+filter_size[0];
            end_c=start_c+filter_size[1];
            crop_curr=im[start_r:end_r,start_c:end_c];
            out_file_curr=out_file_pre+'_'+str(idx_r)+'_'+str(idx_c)+'.png';
            scipy.misc.imsave(out_file_curr,crop_curr);
            
            out_files.append(out_file_curr);
            start_c=start_c+step_size;
            idx_c+=1;
    
        start_r=start_r+step_size;
        idx_r+=1;

    return out_files;
Example #23
0
 def local_pad(x):  # TODO replace with pad global function
     if nf <= 1:
         return x
     if self._margin:
         ma = self._margin
         ba = (nf - 1) / 2  # before/after
         if x.shape[0] - 2*ma <= 0:
             print "shape[0]:", x.shape[0]
             print "ma:", ma
         if x.shape[1] * nf <= 0:
             print "shape[1]:", x.shape[1]
             print "nf:", nf
         ret = numpy.zeros((x.shape[0] - 2 * ma, x.shape[1] * nf),
                 dtype=theano.config.floatX)
         if ba <= ma:
             for j in xrange(ret.shape[0]):
                 ret[j] = x[j:j + 2*ma + 1].flatten()
         else:
             for j in xrange(ret.shape[0]):
                 ret[j] = numpy.pad(x[max(0, j - ba):j + ba +1].flatten(),
                         (max(0, (ba - j) * x.shape[1]),
                             max(0, ((j + ba + 1) - x.shape[0]) * x.shape[1])),
                         'constant', constant_values=(0, 0))
         return ret
     else:
         ret = numpy.zeros((x.shape[0], x.shape[1] * nf),
                 dtype=theano.config.floatX)
         ba = (nf - 1) / 2  # before/after
         for j in xrange(x.shape[0]):
             ret[j] = numpy.pad(x[max(0, j - ba):j + ba +1].flatten(),
                     (max(0, (ba - j) * x.shape[1]),
                         max(0, ((j + ba + 1) - x.shape[0]) * x.shape[1])),
                     'constant', constant_values=(0, 0))
         return ret
Example #24
0
    def treatArray(data):
        if border_mode == 'keep':
            return data

        if n_dim == 3:
            sh = (data.shape[0], ) + data.shape[2:]  # exclude channel (z,x,y)
        else:
            sh = data.shape[2:]  # (x,y)

        if border_mode == 'crop':
            excess = map(lambda x: int((x[0] - x[1]) // 2), zip(sh, ps))
            if n_dim == 3:
                data = data[excess[0]:excess[0] + ps[0], :, excess[1]:excess[1] + ps[1], excess[2]:excess[2] + ps[2]]
            elif n_dim == 2:
                data = data[:, :, excess[0]:excess[0] + ps[0], excess[1]: excess[1] + ps[1]]

        else:
            excess_l = map(lambda x: int(np.ceil(float(x[0] - x[1]) / 2)), zip(ps, sh))
            excess_r = map(lambda x: int(np.floor(float(x[0] - x[1]) / 2)), zip(ps, sh))
            if n_dim == 3:
                pad_with = [(excess_l[0], excess_r[0]), (0, 0), (excess_l[1], excess_r[1]), (excess_l[2], excess_r[2])]
            else:
                pad_with = [(0, 0), (0, 0), (excess_l[0], excess_r[0]), (excess_l[1], excess_r[1])]

            if border_mode == 'mirror':
                data = np.pad(data, pad_with, mode='symmetric')

            if border_mode == '0-pad':
                data = np.pad(data, pad_with, mode='constant', constant_values=0)

        return data
Example #25
0
 def test_find_center_vo_with_downsampling(self):
     sim = read_file('sinogram.npy')
     np.pad(
         sim, ((1000, 1000), (0, 0), (1000, 1000)),
         mode="constant", constant_values=0)
     cen = find_center_vo(sim)
     assert_allclose(cen, 45.28, rtol=0.015)
Example #26
0
def calcgrad(i):
	#i=cv2.imread("images.png",0)
	#i=[[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
	#i=np.array(i)
	height,width=i.shape
	first=np.pad(i,((0,0),(1,0)),'constant')
	second=np.pad(i,((0,1),(1,0)),'constant')
	third=np.pad(i,((0,1),(0,0)),'constant')
	fourth=np.pad(i,((0,1),(0,1)),'constant')
	first=first[:,0:width]
	second=second[1:height+1,0:width]
	third=third[1:height+1,:]
	fourth=fourth[1:height+1,1:width+1]
	first=i-first
	second=i-second
	third=i-third
	fourth=i-fourth
	combo1=32*np.array( first >= second, dtype=int)
	combo2=16*np.array( first >= third, dtype=int)
	combo3=8*np.array( first >= fourth, dtype=int)
	combo4=4*np.array( second >= third, dtype=int)
	combo5=2*np.array( second >= fourth, dtype=int)
	combo6=np.array( third >= fourth, dtype=int)
	ldgp=combo1+combo2+combo3+combo4+combo5+combo6



	return ldgp
Example #27
0
    def __init__(self, G_list, max_num_nodes, features='id'):
        self.max_num_nodes = max_num_nodes
        self.adj_all = []
        self.len_all = []
        self.feature_all = []

        for G in G_list:
            adj = nx.to_numpy_matrix(G)
            # the diagonal entries are 1 since they denote node probability
            self.adj_all.append(
                    np.asarray(adj) + np.identity(G.number_of_nodes()))
            self.len_all.append(G.number_of_nodes())
            if features == 'id':
                self.feature_all.append(np.identity(max_num_nodes))
            elif features == 'deg':
                degs = np.sum(np.array(adj), 1)
                degs = np.expand_dims(np.pad(degs, [0, max_num_nodes - G.number_of_nodes()], 0),
                                      axis=1)
                self.feature_all.append(degs)
            elif features == 'struct':
                degs = np.sum(np.array(adj), 1)
                degs = np.expand_dims(np.pad(degs, [0, max_num_nodes - G.number_of_nodes()],
                                             'constant'),
                                      axis=1)
                clusterings = np.array(list(nx.clustering(G).values()))
                clusterings = np.expand_dims(np.pad(clusterings, 
                                                    [0, max_num_nodes - G.number_of_nodes()],
                                                    'constant'),
                                             axis=1)
                self.feature_all.append(np.hstack([degs, clusterings]))
    def _fixOddKernel(kernel):
        """Take a kernel with odd dimensions and make them even for FFT

        Parameters
        ----------
        kernel : `numpy.array`
            a numpy.array

        Returns
        -------
        out : `numpy.array`
            a fixed kernel numpy.array. Returns a copy if the dimensions needed to change;
            otherwise just return the input kernel.
        """
        # Note this works best for the FFT if we left-pad
        out = kernel
        changed = False
        if (out.shape[0] % 2) == 1:
            out = np.pad(out, ((1, 0), (0, 0)), mode='constant')
            changed = True
        if (out.shape[1] % 2) == 1:
            out = np.pad(out, ((0, 0), (1, 0)), mode='constant')
            changed = True
        if changed:
            out *= (np.mean(kernel) / np.mean(out))  # need to re-scale to same mean for FFT
        return out
Example #29
0
def cross_correlation(x, y, maxlag):
    """
    Cross correlation with a maximum number of lags.

    `x` and `y` must be one-dimensional numpy arrays with the same length.

    This computes the same result as
        numpy.correlate(x, y, mode='full')[len(a)-maxlag-1:len(a)+maxlag]

    The return vaue has length 2*maxlag + 1.

    Author: http://stackoverflow.com/questions/30677241
            Warren Weckesser
    """
    from numpy.lib.stride_tricks import as_strided

    def _check_arg(x, xname):
        x = np.asarray(x)
        if x.ndim != 1:
            raise ValueError('%s must be one-dimensional.' % xname)
        return x

    x = _check_arg(x, 'x')
    y = _check_arg(y, 'y')
    py = np.pad(y.conj(), 2*maxlag, mode='constant')
    T = as_strided(py[2*maxlag:], shape=(2*maxlag+1, len(y) + 2*maxlag),
                   strides=(-py.strides[0], py.strides[0]))
    px = np.pad(x, maxlag, mode='constant')
    return T.dot(px)
 def __init__(self,h5_path,image_paths,max_q=None,max_mc=None):
     self.h5 = h5py.File(h5_path,mode='r')
     self.image_ids = self.h5['image_ids'].value
     self.questions = self.h5['questions'].value
     self.multiple_choice = self.h5['multiple_choices'].value
     self.answers = self.h5['ground_truth'].value
     self.N = len(self.image_ids)
     if max_q:
         if max_q<self.questions.shape[1]:
             self.questions = self.questions[:,:max_q]
         else:
             self.questions = np.pad(self.questions,
                                     ((0,0),(0,max_q-self.questions.shape[-1])),
                                     'constant',constant_values=w2i['</s>'])
     if max_mc:
         if max_mc<self.multiple_choice.shape[-1]:
             self.multiple_choice = self.multiple_choice[:,:,max_mc]
         else:
             self.multiple_choice = np.pad(self.multiple_choice,
                                           ((0,0),(0,0),(0,max_mc-self.multiple_choice.shape[-1])),
                                           'constant',constant_values=w2i['</s>'])
     self.max_mc = self.multiple_choice.shape[1]
     self.max_q = self.questions.shape[1]
     self.indexes = np.arange(self.N)
     self.image_paths = image_paths
Example #31
0
             if st.split('_')[0] == 'bk':
                 bk = get_bank_size(int(st.split('_')[1]))
                 st_id_list.append(bk[0])        
                 st_list.append(bk)
             else:
                 break        
     if len(st_list) == 1: 
         continue
     real_data_id.append(st_id_list)
     real_data.append(st_list)
     start_id_list.append(st_id_list[0])
     end_id_list.append(st_id_list[-1])
 
 real_data_id0 = real_data_id.copy()
 real_data_id = [x[:g_sequence_len] for x in real_data_id] 
 real_data_id1 = [np.pad(x, (0,g_sequence_len - len(x))) for x in real_data_id]
 endtime = time.time(); dtime = endtime - starttime
 print("\nTime for loading real world data:%.8s s" % dtime)
 
 GENERATED_NUM = len(real_data_id1)
 print('\nGENERATED_NUM,real_data_id1', GENERATED_NUM)
     
 VOCAB_SIZE = len(x_info_ids)+1+10 # padding
 print('\nVOCAB_SIZE:',VOCAB_SIZE)
 print('real_vocab_size: ', len(x_info_ids))
 
 starttime = time.time()
 x_index = []
 for i in range(len(x_ids)):
     if x_ids[i] not in x_info_ids:
         x_index.append(i)
Example #32
0
def _pad(seq, max_len, constant_values=0):
    return np.pad(seq, (0, max_len - len(seq)),
                  mode='constant', constant_values=constant_values)
_, model_path, img_in, img_out = sys.argv[0:4]

# For lib_maxout_theano_batch we can control batch size
# batch_size = 1024
# if len(sys.argv) > 4:
#     batch_size = int(sys.argv[4])
# network = DeepNetwork(model_path, batch_size=batch_size)

network = DeepNetwork(model_path)

input_image = normalize_image_float(np.array(Image.open(img_in)))
nx, ny = input_image.shape

pad_by = network.pad_by
pad_image = np.pad(input_image, ((pad_by, pad_by), (pad_by, pad_by)), 'symmetric')

start_time = time.time()

output = network.apply_net(pad_image, perform_pad=False)

print 'Complete in {0:1.4f} seconds'.format(time.time() - start_time)

im = Image.fromarray(np.uint8(output * 255))
im.save(img_out)

print "Image saved."

import h5py
f = h5py.File(img_out.replace('.tif', '') + '.h5')
f['/probabilities'] = output
    def __getitem__(self, idx):

        inst = self.lineIndex[idx]
        author = inst[0]
        lines = inst[1]
        batch = []
        for line in lines:
            if line >= len(self.authors[author]):
                line = (line + 37) % len(self.authors[author])
            img_path, gt, pad_above, pad_below = self.authors[author][line]
            img = cv2.imread(img_path, 0)  #read as grayscale
            if img is None:
                return None

            if pad_above < 0:
                img = img[-pad_above:, :]
                pad_above = 0
            if pad_below < 0:
                img = img[:pad_below, :]
                pad_below = 0
            #if pad_above>0 or pad_below>0:
            img = img = np.pad(img, ((pad_above, pad_below), (10, 10)),
                               'constant',
                               constant_values=255)
            #we also pad a bit on the sides
            #print('{}, {} {}'.format(img_path,pad_above,pad_below))

            if img.shape[0] != self.img_height:
                if img.shape[0] < self.img_height and not self.warning:
                    self.warning = True
                    print("WARNING: upsampling image to fit size")
                percent = float(self.img_height) / img.shape[0]
                if img.shape[1] * percent > self.max_width:
                    percent = self.max_width / img.shape[1]
                img = cv2.resize(img, (0, 0),
                                 fx=percent,
                                 fy=percent,
                                 interpolation=cv2.INTER_CUBIC)
                if img.shape[0] < self.img_height:
                    diff = self.img_height - img.shape[0]
                    img = np.pad(img,
                                 ((diff // 2, diff // 2 + diff % 2), (0, 0)),
                                 'constant',
                                 constant_values=255)

            if len(img.shape) == 2:
                img = img[..., None]
            if self.fg_masks_dir is not None:
                fg_path = os.path.join(self.fg_masks_dir,
                                       '{}_{}.png'.format(author, line))
                fg_mask = cv2.imread(fg_path, 0)
                fg_mask = fg_mask / 255
                if fg_mask.shape != img[:, :, 0].shape:
                    print(
                        'Error, fg_mask ({}, {}) not the same size as image ({})'
                        .format(fg_path, fg_mask.shape, img[:, :, 0].shape))
                    th, fg_mask = cv2.threshold(
                        img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
                    fg_mask = 255 - fg_mask
                    ele = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
                    fg_mask = cv2.dilate(fg_mask, ele)
                    fg_mask = fg_mask / 255

            if self.augmentation is not None:
                #img = augmentation.apply_random_color_rotation(img)
                img = augmentation.apply_tensmeyer_brightness(img)
                img = grid_distortion.warp_image(img)
                if len(img.shape) == 2:
                    img = img[..., None]

            img = img.astype(np.float32)
            img = 1.0 - img / 128.0

            if len(gt) == 0:
                return None
            gt_label = string_utils.str2label_single(gt, self.char_to_idx)

            if self.styles:
                style_i = self.npr.choice(len(self.styles[author][id]))
                style = self.styles[author][id][style_i]
            else:
                style = None
            name = img_path[img_path.rfind('/') + 1:img_path.rfind('.')]
            spaced_label = None if self.spaced_by_name is None else self.spaced_by_name[
                img_path]
            if spaced_label is not None:
                assert (spaced_label.shape[1] == 1)
            toAppend = {
                "image": img,
                "gt": gt,
                "style": style,
                "gt_label": gt_label,
                "spaced_label": spaced_label,
                "name": name,
                "center": self.center,
                "author": author
            }
            if self.fg_masks_dir is not None:
                toAppend['fg_mask'] = fg_mask
            batch.append(toAppend)
        #batch = [b for b in batch if b is not None]
        #These all should be the same size or error
        assert len(set([b['image'].shape[0] for b in batch])) == 1
        assert len(set([b['image'].shape[2] for b in batch])) == 1

        dim0 = batch[0]['image'].shape[0]
        dim1 = max([b['image'].shape[1] for b in batch])
        dim2 = batch[0]['image'].shape[2]

        all_labels = []
        label_lengths = []
        if self.spaced_by_name is not None:
            spaced_labels = []
        else:
            spaced_labels = None
        max_spaced_len = 0

        input_batch = np.full((len(batch), dim0, dim1, dim2),
                              PADDING_CONSTANT).astype(np.float32)
        if self.fg_masks_dir is not None:
            fg_masks = np.full((len(batch), dim0, dim1, 1),
                               0).astype(np.float32)
        for i in range(len(batch)):
            b_img = batch[i]['image']
            toPad = (dim1 - b_img.shape[1])
            if 'center' in batch[0] and batch[0]['center']:
                toPad //= 2
            else:
                toPad = 0
            input_batch[i, :, toPad:toPad + b_img.shape[1], :] = b_img
            if self.fg_masks_dir is not None:
                fg_masks[i, :, toPad:toPad + b_img.shape[1],
                         0] = batch[i]['fg_mask']

            l = batch[i]['gt_label']
            all_labels.append(l)
            label_lengths.append(len(l))

            if spaced_labels is not None:
                sl = batch[i]['spaced_label']
                spaced_labels.append(sl)
                max_spaced_len = max(max_spaced_len, sl.shape[0])

        #all_labels = np.concatenate(all_labels)
        label_lengths = torch.IntTensor(label_lengths)
        max_len = label_lengths.max()
        all_labels = [
            np.pad(l, ((0, max_len - l.shape[0]), ), 'constant')
            for l in all_labels
        ]
        all_labels = np.stack(all_labels, axis=1)
        if self.spaced_by_name is not None:
            spaced_labels = [
                np.pad(l, ((0, max_spaced_len - l.shape[0]), (0, 0)),
                       'constant') for l in spaced_labels
            ]
            ddd = spaced_labels
            spaced_labels = np.concatenate(spaced_labels, axis=1)
            spaced_labels = torch.from_numpy(spaced_labels)
            assert (spaced_labels.size(1) == len(batch))

        images = input_batch.transpose([0, 3, 1, 2])
        images = torch.from_numpy(images)
        labels = torch.from_numpy(all_labels.astype(np.int32))
        #label_lengths = torch.from_numpy(label_lengths.astype(np.int32))
        if self.fg_masks_dir is not None:
            fg_masks = fg_masks.transpose([0, 3, 1, 2])
            fg_masks = torch.from_numpy(fg_masks)

        if batch[0]['style'] is not None:
            styles = np.stack([b['style'] for b in batch], axis=0)
            styles = torch.from_numpy(styles).float()
        else:
            styles = None
        mask, top_and_bottom, center_line = makeMask(images, self.mask_post,
                                                     self.mask_random)
        ##DEBUG
        #for i in range(5):
        #    mask2, top_and_bottom2 = makeMask(images,self.mask_post, self.mask_random)
        #    #extra_masks.append(mask2)
        #    mask2 = ((mask2[0,0]+1)/2).numpy().astype(np.uint8)*255
        #    cv2.imshow('mask{}'.format(i),mask2)
        #mask = ((mask[0,0]+1)/2).numpy().astype(np.uint8)*255
        #cv2.imshow('mask'.format(i),mask)
        #cv2.waitKey()
        toRet = {
            "image": images,
            "mask": mask,
            "top_and_bottom": top_and_bottom,
            "center_line": center_line,
            "label": labels,
            "style": styles,
            "label_lengths": label_lengths,
            "gt": [b['gt'] for b in batch],
            "spaced_label": spaced_labels,
            "name": [b['name'] for b in batch],
            "author": [b['author'] for b in batch],
        }
        if self.fg_masks_dir is not None:
            toRet['fg_mask'] = fg_masks
        return toRet
    def __init__(self, dirPath, split, config):
        if 'split' in config:
            split = config['split']
        if split == 'test':
            subdir = 'testdataset_ICDAR'
        else:
            subdir = 'training_WR'

        self.img_height = config['img_height']
        self.batch_size = config['a_batch_size']
        self.max_width = config['max_width'] if 'max_width' in config else 1300
        skip_pad = config['skip_pad'] if 'skip_pad' in config else False
        #assert(config['batch_size']==1)

        #with open(os.path.join(dirPath,'sets.json')) as f:
        words_file = os.path.join(
            dirPath, 'groundtruth_{}2009_pageNorm.txt'.format(split))
        if not os.path.exists(words_file):
            #create modified GT file with appropriate padding anotation
            #the padding is so images by the same author have the same height (after normalization)
            authors = defaultdict(list)
            with open(
                    os.path.join(dirPath,
                                 'groundtruth_{}2009.txt'.format(split))) as f:
                word_list = f.readlines()
            for line in word_list:
                m = re.match('(lot_.+\/([^/]+)\/[^/]+.tiff) (.+)',
                             line.strip())
                path = os.path.join(dirPath, subdir, m[1])
                author = m[2]
                gt = m[3]
                authors[author].append((m[1], gt))
            new_lines = []
            for author, lines in authors.items():
                above = []
                below = []
                n_lines = []  #to remove non-existent images
                #we measure the number of pixels above and below the centerline in each image
                for i, (path, gt) in enumerate(lines):
                    img_path = os.path.join(dirPath, subdir, path)
                    img = cv2.imread(img_path, 0)  #read as grayscale
                    if img is None:
                        continue
                    img = 255 - img
                    img = cv2.blur(
                        img, (21, 21),
                        borderType=cv2.BORDER_CONSTANT)  #borderValue=0)
                    #th,binarized = cv2.threshold(img,0,1,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
                    #binarized = 1-binarized
                    img = img.sum(axis=1)
                    centerline = np.argmax(
                        img)  #assume center is where most ink is (after blur)
                    assert (centerline > img.shape[0] * 0.1)
                    assert (centerline < img.shape[0] * 0.9)
                    above.append(centerline)
                    below.append(img.shape[0] - centerline)
                    n_lines.append((path, gt))
                above_mean = np.mean(above)
                above_std = np.std(above)
                above_max = np.max(above)
                below_mean = np.mean(below)
                below_std = np.std(below)
                below_max = np.max(below)

                #we want to pad/crop to include most ascenders and descenders, but dont need all
                goal_above = int(min(above_max, above_mean + above_std * 2))
                goal_below = int(min(below_max, below_mean + below_std * 2))
                for i, (path, gt) in enumerate(n_lines):
                    pad_above = goal_above - above[i]
                    pad_below = goal_below - below[i]
                    new_lines.append((path, gt, pad_above, pad_below))
            with open(words_file, 'w') as f:
                for path, gt, above, below in new_lines:
                    f.write('{} {} {} {}\n'.format(path, gt, above, below))

        with open(words_file) as f:
            word_list = f.readlines()
        self.authors = defaultdict(list)
        self.lineIndex = []
        self.max_char_len = 0
        for line in word_list:
            m = re.match(
                '(lot_.+\/([^/]+)\/[^/]+.tiff) ([^ ]+) (-?\d+) (-?\d+)',
                line.strip())
            path = os.path.join(dirPath, subdir, m[1])
            author = m[2]
            gt = m[3]
            self.max_char_len = max(self.max_char_len, len(gt))
            if skip_pad:
                pad_above = 0
                pad_below = 0
            else:
                pad_above = int(m[4])
                pad_below = int(m[5])

            self.authors[author].append((path, gt, pad_above, pad_below))
            #self.lineIndex += [(author,i+authorLines) for i in range(len(lines))]
        #minLines=99999
        #for author,lines in self.authors.items():
        #print('{} {}'.format(author,len(lines)))
        #minLines = min(minLines,len(lines))
        #maxCombs = int(nCr(minLines,self.batch_size)*1.2)
        short = config['short'] if 'short' in config else False
        for author, lines in self.authors.items():
            #if split=='train':
            #    combs=list(itertools.combinations(list(range(len(lines))),self.batch_size))
            #    np.random.shuffle(combs)
            #    self.lineIndex += [(author,c) for c in combs[:maxCombs]]
            #else:
            for i in range(len(lines) // self.batch_size):
                ls = []
                for n in range(self.batch_size):
                    ls.append(self.batch_size * i + n)
                inst = (author, ls)
                self.lineIndex.append(inst)
                if short and i >= short:
                    break
                if short and i >= short:
                    continue
            leftover = len(lines) % self.batch_size
            fill = self.batch_size - leftover
            last = []
            for i in range(fill):
                last.append(i % len(lines))
            for i in range(leftover):
                last.append(len(lines) - (1 + i))
            self.lineIndex.append((author, last))

        self.fg_masks_dir = config[
            'fg_masks_dir'] if 'fg_masks_dir' in config else None
        self.warning = False

        if self.fg_masks_dir is not None:
            if self.fg_masks_dir[-1] == '/':
                self.fg_masks_dir = self.fg_masks_dir[:-1]
            self.fg_masks_dir += '_{}'.format(self.max_width)
            ensure_dir(self.fg_masks_dir)
            for author, lines in self.lineIndex:
                for line in lines:
                    img_path, gt, pad_above, pad_below = self.authors[author][
                        line]
                    fg_path = os.path.join(self.fg_masks_dir,
                                           '{}_{}.png'.format(author, line))
                    if not os.path.exists(fg_path):
                        img = cv2.imread(img_path, 0)  #read as grayscale
                        if img is None:
                            continue

                        if pad_above < 0:
                            img = img[-pad_above:, :]
                            pad_above = 0
                        if pad_below < 0:
                            img = img[:pad_below, :]
                            pad_below = 0
                        #if pad_above>0 or pad_below>0:
                        img = img = np.pad(img,
                                           ((pad_above, pad_below), (10, 10)),
                                           'constant',
                                           constant_values=255)

                        if img.shape[0] != self.img_height:
                            if img.shape[
                                    0] < self.img_height and not self.warning:
                                self.warning = True
                                print("WARNING: upsampling image to fit size")
                            percent = float(self.img_height) / img.shape[0]
                            if img.shape[1] * percent > self.max_width:
                                percent = self.max_width / img.shape[1]
                            img = cv2.resize(img, (0, 0),
                                             fx=percent,
                                             fy=percent,
                                             interpolation=cv2.INTER_CUBIC)
                            if img.shape[0] < self.img_height:
                                diff = self.img_height - img.shape[0]
                                img = np.pad(
                                    img, ((diff // 2, diff // 2 + diff % 2),
                                          (0, 0)),
                                    'constant',
                                    constant_values=255)

                        th, binarized = cv2.threshold(
                            img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
                        binarized = 255 - binarized
                        ele = cv2.getStructuringElement(
                            cv2.MORPH_ELLIPSE, (9, 9))
                        binarized = cv2.dilate(binarized, ele)
                        cv2.imwrite(fg_path, binarized)
                        print('saved fg mask: {}'.format(fg_path))

        char_set_path = config['char_file']
        with open(char_set_path) as f:
            char_set = json.load(f)
        self.char_to_idx = char_set['char_to_idx']
        self.augmentation = config[
            'augmentation'] if 'augmentation' in config else None

        #DEBUG
        if 'overfit' in config and config['overfit']:
            self.lineIndex = self.lineIndex[:10]

        self.center = False  #config['center_pad'] #if 'center_pad' in config else True

        if 'style_loc' in config:
            by_author_styles = defaultdict(list)
            by_author_all_ids = defaultdict(set)
            style_loc = config['style_loc']
            if style_loc[-1] != '*':
                style_loc += '*'
            all_style_files = glob(style_loc)
            assert (len(all_style_files) > 0)
            for loc in all_style_files:
                #print('loading '+loc)
                with open(loc, 'rb') as f:
                    styles = pickle.load(f)
                for i in range(len(styles['authors'])):
                    by_author_styles[styles['authors'][i]].append(
                        (styles['styles'][i], styles['ids'][i]))
                    by_author_all_ids[styles['authors'][i]].update(
                        styles['ids'][i])

            self.styles = defaultdict(lambda: defaultdict(list))
            for author in by_author_styles:
                for id in by_author_all_ids[author]:
                    for style, ids in by_author_styles[author]:
                        if id not in ids:
                            self.styles[author][id].append(style)

            for author in self.authors:
                assert (author in self.styles)
        else:
            self.styles = None

        if 'spaced_loc' in config:
            with open(config['spaced_loc'], 'rb') as f:
                self.spaced_by_name = pickle.load(f)
            #for name,v in spaced_by_name.items():
            #    author, id = name.split('_')
        else:
            self.spaced_by_name = None

        self.mask_post = config['mask_post'] if 'mask_post' in config else []
        self.mask_random = config[
            'mask_random'] if 'mask_random' in config else False
Example #36
0
    def __getitem__(self, index):

        #---------
        #  Image
        #---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()
        img = np.array(Image.open(img_path))

        # Handles images with less than three channels
        while len(img.shape) != 3:
            index += 1
            img_path = self.img_files[index % len(self.img_files)].rstrip()
            img = np.array(Image.open(img_path))

        h, w, _ = img.shape
        dim_diff = np.abs(h - w)
        # Upper (left) and lower (right) padding
        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
        # Determine padding
        pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
        # Add padding
        input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
        padded_h, padded_w, _ = input_img.shape
        # Resize and normalize
        input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
        # Channels-first
        input_img = np.transpose(input_img, (2, 0, 1))
        # As pytorch tensor
        input_img = torch.from_numpy(input_img).float()

        #---------
        #  Label
        #---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        labels = None
        if os.path.exists(label_path):
            labels = np.loadtxt(label_path).reshape(-1, 5)
            # Extract coordinates for unpadded + unscaled image
            x1 = w * (labels[:, 1] - labels[:, 3]/2)
            y1 = h * (labels[:, 2] - labels[:, 4]/2)
            x2 = w * (labels[:, 1] + labels[:, 3]/2)
            y2 = h * (labels[:, 2] + labels[:, 4]/2)
            # Adjust for added padding
            x1 += pad[1][0]
            y1 += pad[0][0]
            x2 += pad[1][0]
            y2 += pad[0][0]
            # Calculate ratios from coordinates
            labels[:, 1] = ((x1 + x2) / 2) / padded_w
            labels[:, 2] = ((y1 + y2) / 2) / padded_h
            labels[:, 3] *= w / padded_w
            labels[:, 4] *= h / padded_h
        # Fill matrix
        filled_labels = np.zeros((self.max_objects, 5))
        if labels is not None:
            filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
        filled_labels = torch.from_numpy(filled_labels)

        return img_path, input_img, filled_labels
Example #37
0
def bilateralfilter(image, texture, sigma_s, sigma_r):
    r = int(np.ceil(3 * sigma_s))
    # Image padding
    # Symmetric padding : pads along the reflected mirror of edge of the array
    # Ex: a [1,2,3,4,5]
    # np.pad(a, (2,3), 'symmetric') means pad 2 elements of first axis edge and 3 elements of second axis edge
    # result: [2,1,1,2,3,4,5,5,4,3]
    # Pads first&second edges of each dimension with 3*sigma (r) for applying filter on borders
    if image.ndim == 3:
        h, w, ch = image.shape
        I = np.pad(image, ((r, r), (r, r), (0, 0)),
                   'symmetric').astype(np.float32)
    elif image.ndim == 2:
        h, w = image.shape
        I = np.pad(image, ((r, r), (r, r)), 'symmetric').astype(np.float32)
    else:
        print('Input image is not valid!')
        return image
    # Check texture size equals given image size then do padding
    if texture.ndim == 3:
        ht, wt, cht = texture.shape
        # If texture shape is not equal to image shape, return
        if ht != h or wt != w:
            print('The guidance image is not aligned with input image!')
            return image
        # else pad texture
        T = np.pad(texture, ((r, r), (r, r), (0, 0)),
                   'symmetric').astype(np.int32)
    elif texture.ndim == 2:
        ht, wt = texture.shape
        if ht != h or wt != w:
            print('The guidance image is not aligned with input image!')
            return image
        T = np.pad(texture, ((r, r), (r, r)), 'symmetric').astype(np.int32)
    # Pre-compute
    # Create np array of zeros with the same shape of the image
    output = np.zeros_like(image)
    # e^(- x / 2sigma^2)
    scaleFactor_s = 1 / (2 * sigma_s * sigma_s)
    scaleFactor_r = 1 / (2 * sigma_r * sigma_r)
    # A lookup table for range kernel (COLOR)
    LUT = np.exp(-np.arange(256) * np.arange(256) * scaleFactor_r)
    # Generate a spatial Gaussian function (cutoff 6-sigma)
    # -r for symmetric grid Ex: 0->6 becomes -3 -> 3
    x, y = np.meshgrid(np.arange(2 * r + 1) - r, np.arange(2 * r + 1) - r)
    # Create multi-variate gaussian distribution for spatial domain with x,y
    kernel_s = np.exp(-(x * x + y * y) * scaleFactor_s)
    # Main body
    if I.ndim == 2 and T.ndim == 2:  # I1T1 (2D Image, 2D Texture) filter
        for y in range(r, r + h):
            for x in range(r, r + w):
                # Get gaussian values representing weights for the window
                wgt = LUT[np.abs(T[y - r:y + r + 1, x - r:x + r + 1] -
                                 T[y, x])] * kernel_s
                # Calculate the intensity of the current pixel using the weighted gaussian values
                # for j=-3sigma->3sigma sum(w(j) * I(j))/sum(w)
                output[y - r, x - r] = np.sum(
                    wgt * I[y - r:y + r + 1, x - r:x + r + 1]) / np.sum(wgt)
    elif I.ndim == 3 and T.ndim == 2:  # I3T1 (3D Image, 2D Texture) filter
        for y in range(r, r + h):
            for x in range(r, r + w):
                wgt = LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1] -
                              T[y, x])] * kernel_s
                wacc = np.sum(wgt)
                output[y - r, x - r, 0] = np.sum(
                    wgt * I[y - r:y + r + 1, x - r:x + r + 1, 0]) / wacc
                output[y - r, x - r, 1] = np.sum(
                    wgt * I[y - r:y + r + 1, x - r:x + r + 1, 1]) / wacc
                output[y - r, x - r, 2] = np.sum(
                    wgt * I[y - r:y + r + 1, x - r:x + r + 1, 2]) / wacc
    elif I.ndim == 3 and T.ndim == 3:  # I3T3 (3D Image, 3D Texture) filter
        for y in range(r, r + h):
            for x in range(r, r + w):
                # Product of 3 independent gaussians for each channel RGB
                wgt = LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 0] - T[y, x, 0])] * \
                      LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 1] - T[y, x, 1])] * \
                      LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 2] - T[y, x, 2])] * \
                      kernel_s
                wacc = np.sum(wgt)
                output[y - r, x - r, 0] = np.sum(
                    wgt * I[y - r:y + r + 1, x - r:x + r + 1, 0]) / wacc
                output[y - r, x - r, 1] = np.sum(
                    wgt * I[y - r:y + r + 1, x - r:x + r + 1, 1]) / wacc
                output[y - r, x - r, 2] = np.sum(
                    wgt * I[y - r:y + r + 1, x - r:x + r + 1, 2]) / wacc
    elif I.ndim == 2 and T.ndim == 3:  # I1T3 filter
        for y in range(r, r + h):
            for x in range(r, r + w):
                wgt = LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 0] - T[y, x, 0])] * \
                      LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 1] - T[y, x, 1])] * \
                      LUT[abs(T[y - r:y + r + 1, x - r:x + r + 1, 2] - T[y, x, 2])] * \
                      kernel_s
                output[y - r, x - r] = np.sum(
                    wgt * I[y - r:y + r + 1, x - r:x + r + 1]) / np.sum(wgt)
    else:
        print('Something wrong!')
        return image

    # return np.clip(output, 0, 255)
    return output
Example #38
0
    #convert to numpy array
    x1 = np.array(samples).astype("complex64")
    # mix down with fc
    fc1 = np.exp(-1.0j * 2.0 * np.pi * F_offset / Fs * np.arange(len(x1)))

    x2 = x1 * fc1

    f_bw = 200000
    Inp1 = x2.real * np.cos(2.0 * np.pi * Fc * np.arange(len(x2)))
    Qa1 = -1 * x2.imag * np.sin(2.0 * np.pi * Fc * np.arange(len(x2)))

    Inp2 = np.convolve(Inp1, h)
    Qa2 = np.convolve(Qa1, h)

    delta_T = 155
    Inp2_delay = np.pad(Inp2, (155, 0), 'constant', constant_values=(0, 0))
    Qa2_delay = np.pad(Qa2, (155, 0), 'constant', constant_values=(0, 0))
    Inp3 = np.pad(Inp2, (0, 155), 'constant', constant_values=(0, 0))
    Qa3 = np.pad(Qa2, (0, 155), 'constant', constant_values=(0, 0))
    D = Qa3 * Inp2_delay - Inp3 * Qa2_delay
    y = []
    d = []
    k = 0
    for x in D:
        k += 1
        if x > 0:
            y.append(1)
        else:
            y.append(0)
        if k == 30:
            k = 0
Example #39
0
def pad(img, padding, fill=0, padding_mode='constant'):
    """Pad the given PIL Image on all sides with speficified padding mode and fill value.

    Args:
        img (PIL Image): Image to be padded.
        padding (int or tuple): Padding on each border. If a single int is provided this
            is used to pad all borders. If tuple of length 2 is provided this is the padding
            on left/right and top/bottom respectively. If a tuple of length 4 is provided
            this is the padding for the left, top, right and bottom borders
            respectively.
        fill: Pixel fill value for constant fill. Default is 0. If a tuple of
            length 3, it is used to fill R, G, B channels respectively.
            This value is only used when the padding_mode is constant
        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
            constant: pads with a constant value, this value is specified with fill
            edge: pads with the last value on the edge of the image
            reflect: pads with reflection of image (without repeating the last value on the edge)
                padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
                will result in [3, 2, 1, 2, 3, 4, 3, 2]
            symmetric: pads with reflection of image (repeating the last value on the edge)
                padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
                will result in [2, 1, 1, 2, 3, 4, 4, 3]
            square_constant: overrides padding to pad smallest edge with constant value,
                specified with fill, to make a square image. 2D or 3D imgs only.

    Returns:
        PIL Image: Padded image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))

    if not isinstance(padding, (numbers.Number, tuple)):
        raise TypeError('Got inappropriate padding arg')
    if not isinstance(fill, (numbers.Number, str, tuple)):
        raise TypeError('Got inappropriate fill arg')
    if not isinstance(padding_mode, str):
        raise TypeError('Got inappropriate padding_mode arg')

    if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
        raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
                         "{} element tuple".format(len(padding)))

    assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric', 'square_constant'], \
        'Padding mode should be either constant, square_constant, edge, reflect or symmetric'

    if padding_mode == 'constant':
        return ImageOps.expand(img, border=padding, fill=fill)
    elif padding_mode == 'square_constant':
        if len(img.size) > 3 or len(img.size) < 2:
            raise ValueError("padding_mode == 'square_constant' valid only for 2D or 3D images")
        im_size = img.size if len(img.size) == 2 else img.size[:-1]
        pad_amt, pad_dim = (np.max(im_size) - np.min(im_size))//2, np.argmin(im_size)
        padding = np.zeros((2,), dtype=int)
        padding[pad_dim] = pad_amt
        return ImageOps.expand(img, border=tuple(padding), fill=fill)
    else:
        if isinstance(padding, int):
            pad_left = pad_right = pad_top = pad_bottom = padding
        if isinstance(padding, collections.Sequence) and len(padding) == 2:
            pad_left = pad_right = padding[0]
            pad_top = pad_bottom = padding[1]
        if isinstance(padding, collections.Sequence) and len(padding) == 4:
            pad_left = padding[0]
            pad_top = padding[1]
            pad_right = padding[2]
            pad_bottom = padding[3]

        img = np.asarray(img)
        # RGB image
        if len(img.shape) == 3:
            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode)
        # Grayscale image
        if len(img.shape) == 2:
            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)

        return Image.fromarray(img)
 def apply(self, data):
     lvl = np.array([0.1, 4, 8, 12, 30, 70, 180])  # Frequency levels in Hz
     data = data.transpose()
     data = np.pad(data,((0,self.splitsize -data.shape[0]),(0,0)),'constant')
     nt, nc =data.shape
     fs = 400
     feat = []
     if self.smooth:
         data=resample(data,int(nt/fs*self.smooth_Hz))
         nt, nc = data.shape
         fs = self.smooth_Hz
         lvl = np.array([0.1, 4, 8, 12, 30, 70])
     if self.with_six or self.with_dy:
         D = np.absolute(np.fft.rfft(data, axis=0))
         D[0, :] = 0  # set the DC component to zero
         for i in range(nc):
             D[:,i] /= D[:,i].sum()  # Normalize each channel
         coorD = np.corrcoef(D.transpose())
         w = Eigenvalues().apply(coorD)
         tfreq = self.power_edge
         ppow = 0.5
         # top_freq = int(round(nt / sfreq * tfreq)) + 1
         top = int(round(nt / fs * tfreq))
         spedge = np.cumsum(D[:top, :], axis=0)
         spedge = np.argmin(np.abs(spedge - (spedge.max(axis=0) * ppow)), axis=0)
         spedge = spedge / top * tfreq
         feat.append(w)
         feat.append(spedge.ravel())
     if self.with_six:
         lseg = np.round(nt / fs * lvl).astype('int')
         sixspect = np.zeros((len(lvl) - 1, nc))
         for j in range(len(sixspect)):
             sixspect[j, :] = 2 * np.sum(D[lseg[j]:lseg[j + 1], :], axis=0)
         spentropy = -1 * np.sum(np.multiply(sixspect, np.log(sixspect)), axis=0)
         feat.append(sixspect.ravel())
         feat.append(spentropy.ravel())
     if self.with_dy:
         ldat = int(floor(nt / 2.0))
         no_levels = int(floor(log(ldat, 2.0)))
         dspect = np.zeros((no_levels, nc))
         for j in range(no_levels - 1, -1, -1):
             dspect[j, :] = 2 * np.sum(D[int(floor(ldat / 2.0)):ldat, :], axis=0)
             ldat = int(floor(ldat / 2.0))
         spentropyDyd = -1 * np.sum(np.multiply(dspect, np.log(dspect)), axis=0)
         feat.append(dspect.ravel())
         feat.append(spentropyDyd.ravel())
     if self.with_mc:
         mobility = np.divide(
             np.std(np.diff(data, axis=0)),
             np.std(data, axis=0))
         complexity = np.divide(np.divide(
             # std of second derivative for each channel
             np.std(np.diff(np.diff(data, axis=0), axis=0), axis=0),
             # std of second derivative for each channel
             np.std(np.diff(data, axis=0), axis=0))
             , mobility)
         feat.append(mobility)
         feat.append(complexity)
     if self.with_time_corr:
         data1 = TimeCorrelation(self.max_hz, self.scale_option).apply(data.transpose())
         feat.append(data1)
     if self.with_equal_freq:
         data2 = FreqCorrelation(self.start, self.end, self.scale_option, self.resample_size, with_fft=True,
                                 with_corr=True).apply(data.transpose())
         feat.append(data2)
     if self.onlyfd_dfa:
         fd = np.zeros((2, nc))
         for j in range(nc):
             fd[0, j] = pyeeg.pfd(data[:, j])
             fd[1, j] = pyeeg.hfd(data[:, j], 3)
         DFA = np.zeros(nc)
         for j in range(nc):
             DFA[j] = pyeeg.dfa(data[:, j])
             feat=np.concatenate((
                 fd.ravel(),
                 DFA.ravel(),
                 np.sqrt(DFA).ravel(),
                 np.square(DFA.ravel()),
                 np.sqrt(fd).ravel(),
                 np.square(fd).ravel(),
             ))
     if self.with_square or self.with_log or self.with_sqrt:
         tmp = np.concatenate(feat, axis=0)
         tmp = np.absolute(tmp)
         if self.with_square:
             feat.append(np.square(tmp))
         if self.with_log:
             feat.append(np.log(tmp))
         if self.with_sqrt:
             feat.append(np.sqrt(tmp))
     return np.concatenate(feat, axis=0)
Example #41
0
def nms_fast(in_corners, H, W, dist_thresh):
  """
  Run a faster approximate Non-Max-Suppression on numpy corners shaped:
    3xN [x_i,y_i,conf_i]^T

  Algo summary: Create a grid sized HxW. Assign each corner location a 1, rest
  are zeros. Iterate through all the 1's and convert them either to -1 or 0.
  Suppress points by setting nearby values to 0.

  Grid Value Legend:
  -1 : Kept.
    0 : Empty or suppressed.
    1 : To be processed (converted to either kept or supressed).

  NOTE: The NMS first rounds points to integers, so NMS distance might not
  be exactly dist_thresh. It also assumes points are within image boundaries.

  Inputs
    in_corners - 3xN numpy array with corners [x_i, y_i, confidence_i]^T.
    H - Image height.
    W - Image width.
    dist_thresh - Distance to suppress, measured as an infinty norm distance.
  Returns
    nmsed_corners - 3xN numpy matrix with surviving corners.
    nmsed_inds - N length numpy vector with surviving corner indices.
  """
  grid = np.zeros((H, W)).astype(int) # Track NMS data.
  inds = np.zeros((H, W)).astype(int) # Store indices of points.
  # Sort by confidence and round to nearest int.
  inds1 = np.argsort(-in_corners[2,:])
  corners = in_corners[:,inds1]
  rcorners = corners[:2,:].round().astype(int) # Rounded corners.
  # Check for edge case of 0 or 1 corners.
  if rcorners.shape[1] == 0:
    return np.zeros((3,0)).astype(int), np.zeros(0).astype(int)
  if rcorners.shape[1] == 1:
    out = np.vstack((rcorners, in_corners[2])).reshape(3,1)
    return out, np.zeros((1)).astype(int)
  # Initialize the grid.
  for i, rc in enumerate(rcorners.T):
    grid[rcorners[1,i], rcorners[0,i]] = 1
    inds[rcorners[1,i], rcorners[0,i]] = i
  # Pad the border of the grid, so that we can NMS points near the border.
  pad = dist_thresh
  grid = np.pad(grid, ((pad,pad), (pad,pad)), mode='constant')
  # Iterate through points, highest to lowest conf, suppress neighborhood.
  count = 0
  for i, rc in enumerate(rcorners.T):
    # Account for top and left padding.
    pt = (rc[0]+pad, rc[1]+pad)
    if grid[pt[1], pt[0]] == 1: # If not yet suppressed.
      grid[pt[1]-pad:pt[1]+pad+1, pt[0]-pad:pt[0]+pad+1] = 0
      grid[pt[1], pt[0]] = -1
      count += 1
  # Get all surviving -1's and return sorted array of remaining corners.
  keepy, keepx = np.where(grid==-1)
  keepy, keepx = keepy - pad, keepx - pad
  inds_keep = inds[keepy, keepx]
  out = corners[:, inds_keep]
  values = out[-1, :]
  inds2 = np.argsort(-values)
  out = out[:, inds2]
  out_inds = inds1[inds_keep[inds2]]
  return out, out_inds
def padding(_img, padd):    # Add padding
    _img = np.pad(_img, pad_width=[(padd, padd), (padd, padd)], mode='constant', constant_values=0)
    return _img
Example #43
0
    def sde(self):
        """
        Support adding kernels for sde representation
        """

        import scipy.linalg as la

        F     = None
        L     = None
        Qc    = None
        H     = None
        Pinf  = None
        P0    = None
        dF    = None
        dQc   = None
        dPinf = None
        dP0   = None
        n = 0
        nq = 0
        nd = 0

         # Assign models
        for p in self.parts:
            (Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
            F = la.block_diag(F,Ft) if (F is not None) else Ft
            L = la.block_diag(L,Lt) if (L is not None) else Lt
            Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
            H = np.hstack((H,Ht)) if (H is not None) else Ht

            Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
            P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t

            if dF is not None:
                dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
                        'constant', constant_values=0)
                dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
            else:
                dF = dFt

            if dQc is not None:
                dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
                        'constant', constant_values=0)
                dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
            else:
                dQc = dQct

            if dPinf is not None:
                dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
                        'constant', constant_values=0)
                dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
            else:
                dPinf = dPinft

            if dP0 is not None:
                dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
                        'constant', constant_values=0)
                dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
            else:
                dP0 = dP0t

            n += Ft.shape[0]
            nq += Qct.shape[0]
            nd += dFt.shape[2]

        assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
        assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
        assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
        assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
        assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
        assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
        assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
        assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
        assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"

        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
def dice_images(
    sliceSavePath,
    imageWidth,
    imageHeight,
    pixelPitch,
    x_boundries,
    y_boundries,
    overlap,
    progress_handle=None,
):

    dicedSavePath = Path(sliceSavePath) / "diced_images"
    fullSavePath = Path(sliceSavePath) / "full_sized_images"

    if not Path(dicedSavePath).exists():
        Path(dicedSavePath).mkdir()
    if not Path(fullSavePath).exists():
        Path(fullSavePath).mkdir()

    images = glob.glob(str(sliceSavePath / "*.png"))
    image_count = len(images)
    for i, image_path in enumerate(images):
        image_path = Path(image_path)
        filename = image_path.stem
        extention = image_path.suffix
        img = np.array(Image.open(image_path))

        last_x = 0
        for j, x in enumerate(x_boundries):
            x = int(x)
            if x == 0:
                continue
            last_y = 0
            for k, y in enumerate(y_boundries):
                y = int(y)
                if y == 0:
                    continue
                sub_img = img[last_y:y, last_x:x]

                # pad image
                img_width = x - last_x
                img_height = y - last_y
                pad_x = printer.width - img_width
                pad_y = printer.height - img_height
                sub_img = np.pad(
                    sub_img,
                    (
                        (pad_y // 2, pad_y - (pad_y // 2)),
                        (pad_x // 2, pad_x - (pad_x // 2)),
                    ),
                )

                sub_img = Image.fromarray(sub_img).convert("L")
                sub_img.save(
                    dicedSavePath / f"{filename}_stitch_x{j-1}_y{k-1}{extention}"
                )
                last_y = y - overlap
            last_x = x - overlap

        image_path.replace(fullSavePath / f"{filename}{extention}")
        if progress_handle is not None:
            progress_handle("Dicing images", i + 1, image_count)

    with open(sliceSavePath / "stitching_info.json", "w") as file:
        info = {
            "pixel_pitch": pixelPitch,
            "x_boundries": x_boundries,
            "y_boundries": y_boundries,
            "overlap": overlap,
        }
        json.dump(info, file)
Example #45
0
# test_data_path = 'F:/data/private/speech/vctk/qts/p225_001.npy'
# target_speaker_ivec = ivecs[speaker2id(target_speaker)]
receptive_field = hp.dilations[-1] * hp.size
speaker_emb = tf.placeholder(dtype=tf.float32,
                             shape=(hp.batch_size, len(hp.speakers)))
_input = tf.placeholder(dtype=tf.float32,
                        shape=(hp.batch_size, receptive_field, hp.Q))
_z_q = tf.placeholder(dtype=tf.float32,
                      shape=(hp.batch_size, receptive_field, hp.D))
x = decoder(_input, speaker_emb, _z_q, is_training=False)
out = tf.multinomial(tf.squeeze(x, 1), num_samples=1, output_dtype=tf.int32)
for j in range(0, len(test_data_paths)):
    test_qt = np.load(test_data_paths[j][0])
    test_speaker = test_data_paths[j][1]
    x = np.pad(test_qt, ([0, hp.T], [0, 0]),
               mode="constant",
               constant_values=0)[:hp.T, :]
    x = np.expand_dims(x, 0)
    test_speaker = np.expand_dims(test_speaker, 0)
    # target_speaker_ivec = np.expand_dims(target_speaker_ivec,0)
    z_q, x, _speaker_emb = sess.run([g.z_q, g.encoder_inputs, g.speakers],
                                    feed_dict={
                                        g.x: x,
                                        g.speaker_id: test_speaker
                                    })  #shape = (B,T,K)
    output = np.squeeze(x)
    inputs = x[:, :receptive_field, :]  # (B,r,Q)
    # decode and get multinomial distribuition

    for i in tqdm(range(hp.T - receptive_field - 1),
                  total=hp.T - receptive_field,
Example #46
0
def zeropad_to_max_len(data, max_len=121):
    return np.pad(data, [(0, 0), (0, max_len - data.shape[1]), (0, 0)],
                  mode="constant")
Example #47
0
def load_umc_sheets(data_dir="/home/matthias/Data/umc_mozart", require_performance=False):
    """ load unwarpped sheets """
    import glob
    import cv2

    # initialize omr system
    from omr.omr_app import OpticalMusicRecognizer
    from omr.utils.data import prepare_image
    from lasagne_wrapper.network import SegmentationNetwork

    from omr.models import system_detector, bar_detector

    net = system_detector.build_model()
    system_net = SegmentationNetwork(net, print_architecture=False)
    system_net.load('sheet_utils/omr_models/system_params.pkl')

    net = bar_detector.build_model()
    bar_net = SegmentationNetwork(net, print_architecture=False)
    bar_net.load('sheet_utils/omr_models/bar_params.pkl')

    piece_names = []
    unwrapped_sheets = []
    piece_paths = []

    # get list of all pieces
    piece_dirs = np.sort(glob.glob(os.path.join(data_dir, '*')))
    n_pieces = len(piece_dirs)

    # iterate pieces
    kept_pages = 0
    for i_piece, piece_dir in enumerate(piece_dirs):
        piece_name = piece_dir.split('/')[-1]

        # if "214_" not in piece_name:
        #     continue

        print(col.print_colored("Processing piece %d of %d (%s)" % (i_piece + 1, n_pieces, piece_name), col.OKBLUE))

        # check if there is a performance
        if require_performance and len(glob.glob(os.path.join(piece_dir, "*performance*"))) == 0:
            print("No performance found!")
            continue

        # load pages
        page_paths = np.sort(glob.glob(os.path.join(piece_dir, "sheet/*.png")))
        if len(page_paths) == 0:
            print("No sheet available!!!")
            continue

        unwrapped_sheet = np.zeros((SYSTEM_HEIGHT, 0), dtype=np.uint8)
        system_problem = False
        for i_page, page_path in enumerate(page_paths):
            kept_pages += 1

            # load sheet image
            I = cv2.imread(page_path, 0)

            # load system coordinates
            # page_id = i_page + 1
            # page_systems = np.load(os.path.join(piece_dir, "coords", "systems_%02d.npy" % (i_page + 1)))

            # detect systems
            I_prep = prepare_image(I)
            omr = OpticalMusicRecognizer(note_detector=None, system_detector=system_net, bar_detector=bar_net)

            try:
                page_systems = omr.detect_systems(I_prep, verbose=False)
            except:
                print("Problem in system detection!!!")
                system_problem = True
                continue

            # plt.figure("System Localization")
            # plt.clf()
            # plt.imshow(I, cmap=plt.cm.gray)
            # plt.xlim([0, I.shape[1] - 1])
            # plt.ylim([I.shape[0] - 1, 0])

            # for system in page_systems:
            #     plt.plot(system[:, 1], system[:, 0], 'mo', alpha=0.5)
            # plt.show(block=True)

            # unwrap sheet
            for system in page_systems:

                r0 = int(np.mean([system[0, 0], system[2, 0]])) - SYSTEM_HEIGHT // 2
                r1 = r0 + SYSTEM_HEIGHT
                c0 = int(system[0, 1])
                c1 = int(system[1, 1])

                # fix row slice coordinates
                r0 = max(0, r0)
                r1 = min(r1, I.shape[0])
                r0 = max(r0, r1 - SYSTEM_HEIGHT)

                staff_img = I[r0:r1, c0:c1].astype(np.uint8)

                if staff_img.shape[0] < SYSTEM_HEIGHT:
                    to_pad = SYSTEM_HEIGHT - staff_img.shape[0]
                    if to_pad > (0.1 * SYSTEM_HEIGHT):
                        print("Problem in system padding!!!")
                        continue
                    staff_img = np.pad(staff_img, ((0, to_pad), (0, 0)), mode="edge")

                unwrapped_sheet = np.hstack((unwrapped_sheet, staff_img))

            # plt.figure("Unwrapped")
            # plt.imshow(unwrapped_sheet)
            # plt.show(block=True)

        if not system_problem:
            piece_names.append(piece_name)
            piece_paths.append(piece_dir)
            unwrapped_sheets.append(unwrapped_sheet)

    print("%d pieces covering %d pages of sheet music." % (len(piece_names), kept_pages))

    return piece_names, piece_paths, unwrapped_sheets
Example #48
0
def maux(output_text, num):

    print("debug -- django")

    ## Info & args
    # parser = argparse.ArgumentParser(
    #     formatter_class=argparse.ArgumentDefaultsHelpFormatter
    # )
    # parser.add_argument("-e", "--enc_model_fpath", type=Path,
    #                     default="D:/RemindMe/django-remindme/mysite/trained model/encoder/saved_models/pretrained.pt",
    #                     help="Path to a saved encoder")
    # parser.add_argument("-s", "--syn_model_dir", type=Path,
    #                     default="D:/RemindMe/django-remindme/mysite/trained model/synthesizer/saved_models/logs-pretrained/",
    #                     help="Directory containing the synthesizer model")
    # parser.add_argument("-v", "--voc_model_fpath", type=Path,
    #                     default="D:/RemindMe/django-remindme/mysite/trained model/vocoder/saved_models/pretrained/pretrained.pt",
    #                     help="Path to a saved vocoder")
    # parser.add_argument("--low_mem", action="store_true", help=\
    #     "If True, the memory used by the synthesizer will be freed after each use. Adds large "
    #     "overhead but allows to save some GPU memory for lower-end GPUs.")
    # parser.add_argument("--no_sound", action="store_true", help=\
    #     "If True, audio won't be played.")
    # args = parser.parse_args()
    # print_args(args, parser)
    # if not args.no_sound:
    #     import sounddevice as sd

    ## Print some environment information (for debugging purposes)
    print("Running a test of your configuration...\n")
    if not torch.cuda.is_available():
        print(
            "Your PyTorch installation is not configured to use CUDA. If you have a GPU ready "
            "for deep learning, ensure that the drivers are properly installed, and that your "
            "CUDA version matches your PyTorch installation. CPU-only inference is currently "
            "not supported.",
            file=sys.stderr)
        quit(-1)
    device_id = torch.cuda.current_device()
    gpu_properties = torch.cuda.get_device_properties(device_id)
    print(
        "Found %d GPUs available. Using GPU %d (%s) of compute capability %d.%d with "
        "%.1fGb total memory.\n" %
        (torch.cuda.device_count(), device_id, gpu_properties.name,
         gpu_properties.major, gpu_properties.minor,
         gpu_properties.total_memory / 1e9))

    ## Load the models one by one.
    print("Preparing the encoder, the synthesizer and the vocoder...")
    #encoder.load_model(args.enc_model_fpath)
    #synthesizer = Synthesizer(args.syn_model_dir.joinpath("taco_pretrained"), low_mem=args.low_mem)
    #vocoder.load_model(args.voc_model_fpath)
    encoder.load_model(
        "D:/RemindMe/django-remindme/mysite/trained model/encoder/saved_models/pretrained.pt"
    )

    synthesizer = Synthesizer(
        "D:/RemindMe/django-remindme/mysite/trained model/synthesizer/saved_models/logs-pretrained/taco_pretrained",
        low_mem=False)

    vocoder.load_model(
        "D:/RemindMe/django-remindme/mysite/trained model/vocoder/saved_models/pretrained/pretrained.pt"
    )

    ## Run a test
    print("Testing your configuration with small inputs.")
    # Forward an audio waveform of zeroes that lasts 1 second. Notice how we can get the encoder's
    # sampling rate, which may differ.
    # If you're unfamiliar with digital audio, know that it is encoded as an array of floats
    # (or sometimes integers, but mostly floats in this projects) ranging from -1 to 1.
    # The sampling rate is the number of values (samples) recorded per second, it is set to
    # 16000 for the encoder. Creating an array of length <sampling_rate> will always correspond
    # to an audio of 1 second.
    print("\tTesting the encoder...")
    encoder.embed_utterance(np.zeros(encoder.sampling_rate))

    # Create a dummy embedding. You would normally use the embedding that encoder.embed_utterance
    # returns, but here we're going to make one ourselves just for the sake of showing that it's
    # possible.
    embed = np.random.rand(speaker_embedding_size)
    # Embeddings are L2-normalized (this isn't important here, but if you want to make your own
    # embeddings it will be).
    embed /= np.linalg.norm(embed)
    # The synthesizer can handle multiple inputs with batching. Let's create another embedding to
    # illustrate that
    embeds = [embed, np.zeros(speaker_embedding_size)]
    texts = ["test 1", "test 2"]
    print(
        "\tTesting the synthesizer... (loading the model will output a lot of text)"
    )
    mels = synthesizer.synthesize_spectrograms(texts, embeds)

    # The vocoder synthesizes one waveform at a time, but it's more efficient for long ones. We
    # can concatenate the mel spectrograms to a single one.
    mel = np.concatenate(mels, axis=1)
    # The vocoder can take a callback function to display the generation. More on that later. For
    # now we'll simply hide it like this:
    no_action = lambda *args: None
    print("\tTesting the vocoder...")
    # For the sake of making this test short, we'll pass a short target length. The target length
    # is the length of the wav segments that are processed in parallel. E.g. for audio sampled
    # at 16000 Hertz, a target length of 8000 means that the target audio will be cut in chunks of
    # 0.5 seconds which will all be generated together. The parameters here are absurdly short, and
    # that has a detrimental effect on the quality of the audio. The default parameters are
    # recommended in general.
    vocoder.infer_waveform(mel,
                           target=200,
                           overlap=50,
                           progress_callback=no_action)

    print("All test passed! You can now synthesize speech.\n\n")

    ## Interactive speech generation
    print(
        "This is a GUI-less example of interface to SV2TTS. The purpose of this script is to "
        "show how you can interface this project easily with your own. See the source code for "
        "an explanation of what is happening.\n")

    print("Interactive generation loop")

    in_fpath = Path(
        "D:/RemindMe/django-remindme/mysite/trained model/sam_narration2.wav")
    preprocessed_wav = encoder.preprocess_wav(in_fpath)
    original_wav, sampling_rate = librosa.load(in_fpath)
    preprocessed_wav = encoder.preprocess_wav(original_wav, sampling_rate)
    print("Loaded file succesfully")
    embed = encoder.embed_utterance(preprocessed_wav)
    print("Created the embedding")
    embeds = [embed]

    text = output_text
    texts = [text]

    specs = synthesizer.synthesize_spectrograms(texts, embeds)
    spec = specs[0]
    print("Created the mel spectrogram")

    ## Generating the waveform
    print("Synthesizing the waveform:")

    # Synthesizing the waveform is fairly straightforward. Remember that the longer the
    # spectrogram, the more time-efficient the vocoder.
    generated_wav = vocoder.infer_waveform(spec)

    ## Post-generation
    # There's a bug with sounddevice that makes the audio cut one second earlier, so we
    # pad it.
    generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate),
                           mode="constant")

    # Play the audio (non-blocking)

    # Save it on the disk
    filexpath = "D:/RemindMe/django_remindme_model/mysite/media/demo_output_%02d.wav" % num
    fx = "demo_output_%02d" % num
    print(generated_wav.dtype)
    librosa.output.write_wav(filexpath, generated_wav.astype(np.float32),
                             synthesizer.sample_rate)

    print("\nSaved output as %s\n\n" % filexpath)

    return fx
Example #49
0
def append_tile(array, geom, tot_array, tot_geom):
    """
    Append a tile to a larger arrayset.
    Args:
        
        array: projection stack
        geom: geometry descritption
        tot_array: output array
        tot_geom: output geometry
        
    """ 
        
    print('Stitching a tile...')               
    
    # Assuming all projections have equal number of angles and same pixel sizes
    total_shape = tot_array.shape[::2]
    det_shape = array.shape[::2]
    
    if numpy.abs(tot_geom['det_pixel'] - geom['det_pixel']) > 1e-6:
        raise Exception('This array has different detector pixels! %f v.s. %f. Aborting!' % (geom['det_pixel'], tot_geom['det_pixel']))
    
    if tot_array.shape[1] != array.shape[1]:
        raise Exception('This array has different number of projections from the others. %u v.s. %u. Aborting!' % (array.shape[1], tot_array.shape[1]))
    
    total_size = tot_geom.detector_size(total_shape)
    det_size = geom.detector_size(det_shape)
                    
    # Offset from the left top corner:
    y0, x0 = tot_geom.detector_centre()   
    y, x = geom.detector_centre()
    
    x_offset = ((x - x0) + total_size[1] / 2 - det_size[1] / 2) / geom.pixel[1]
    y_offset = ((y - y0) + total_size[0] / 2 - det_size[0] / 2) / geom.pixel[0]
    
    # Round em up!            
    x_offset = int(numpy.round(x_offset))                   
    y_offset = int(numpy.round(y_offset))                   
                
    # Pad image to get the same size as the total_slice:        
    pad_x = tot_array.shape[2] - array.shape[2]
    pad_y = tot_array.shape[0] - array.shape[0]  
    
    # Collapce both arraysets and compute residual shift
    shift = _find_shift_(tot_array, array, [y_offset, x_offset])
    
    x_offset += shift[1]
    y_offset += shift[0]
           
    # Precompute weights:
    base0 = (tot_array[:, ::100, :].mean(1)) != 0
    
    new0 = numpy.zeros_like(base0)
    # Shift image:
    new0[:det_shape[0], :det_shape[1]] = 1.0
    new0 = interp.shift(new0, [y_offset, x_offset], order = 1)
    #new0[y_offset:int(y_offset+det_shape[0]), x_offset:int(x_offset + det_shape[1])] = 1.0
    
    base_dist = ndimage.distance_transform_bf(base0)    
    new_dist =  ndimage.distance_transform_bf(new0)    
     
    # Trim edges to avoid interpolation errors:
    base_dist -= 1    
    new_dist -= 1
    
    base_dist *= base_dist > 0
    new_dist *= new_dist > 0
    norm = (base_dist + new_dist)
    norm[norm == 0] = numpy.inf
    
    time.sleep(0.5)
    
    # Apply offsets:
    for ii in tqdm(range(tot_array.shape[1]), unit='img'):   
        
        # Pad to match sizes:
        new = numpy.pad(array[:, ii, :], ((0, pad_y), (0, pad_x)), mode = 'constant')  
        
        # Apply shift:
        if (x_offset != 0) | (y_offset != 0):   
            
            # Shift image:
            new = interp.shift(new, [y_offset, x_offset], order = 1)
                    
        # Add two images in a smart way:
        base = tot_array[:, ii, :]  
        
        # Create distances to edge:
        tot_array[:, ii, :] = ((base_dist * base) + (new_dist * new)) / norm
Example #50
0
def _process_utterance(out_dir, index, wav_path, text):
    # Load the audio to a numpy array:
    wav = audio.load_wav(wav_path)

    if hparams.rescaling:
        wav = wav / np.abs(wav).max() * hparams.rescaling_max

    # Mu-law quantize
    if is_mulaw_quantize(hparams.input_type):
        # [0, quantize_channels)
        out = P.mulaw_quantize(wav, hparams.quantize_channels)

        # Trim silences
        start, end = audio.start_and_end_indices(out,
                                                 hparams.silence_threshold)
        wav = wav[start:end]
        out = out[start:end]
        constant_values = P.mulaw_quantize(0, hparams.quantize_channels)
        out_dtype = np.int16
    elif is_mulaw(hparams.input_type):
        # [-1, 1]
        out = P.mulaw(wav, hparams.quantize_channels)
        constant_values = P.mulaw(0.0, hparams.quantize_channels)
        out_dtype = np.float32
    else:
        # [-1, 1]
        out = wav
        constant_values = 0.0
        out_dtype = np.float32

    # Compute a mel-scale spectrogram from the trimmed wav:
    # (N, D)
    mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
    # lws pads zeros internally before performing stft
    # this is needed to adjust time resolution between audio and mel-spectrogram
    l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size())

    # zero pad for quantized signal
    out = np.pad(out, (l, r), mode="constant", constant_values=constant_values)
    N = mel_spectrogram.shape[0]
    assert len(out) >= N * audio.get_hop_size()

    # time resolution adjustment
    # ensure length of raw audio is multiple of hop_size so that we can use
    # transposed convolution to upsample
    out = out[:N * audio.get_hop_size()]
    assert len(out) % audio.get_hop_size() == 0

    timesteps = len(out)

    # Write the spectrograms to disk:
    audio_filename = 'ljspeech-audio-%05d.npy' % index
    mel_filename = 'ljspeech-mel-%05d.npy' % index
    np.save(os.path.join(out_dir, audio_filename),
            out.astype(out_dtype),
            allow_pickle=False)
    np.save(os.path.join(out_dir, mel_filename),
            mel_spectrogram.astype(np.float32),
            allow_pickle=False)

    # Return a tuple describing this training example:
    return (audio_filename, mel_filename, timesteps, text)
Example #51
0
def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"):
    """Resizes an image keeping the aspect ratio unchanged.

    min_dim: if provided, resizes the image such that it's smaller
        dimension == min_dim
    max_dim: if provided, ensures that the image longest side doesn't
        exceed this value.
    min_scale: if provided, ensure that the image is scaled up by at least
        this percent even if min_dim doesn't require it.
    mode: Resizing mode.
        none: No resizing. Return the image unchanged.
        square: Resize and pad with zeros to get a square image
            of size [max_dim, max_dim].
        pad64: Pads width and height with zeros to make them multiples of 64.
               If min_dim or min_scale are provided, it scales the image up
               before padding. max_dim is ignored in this mode.
               The multiple of 64 is needed to ensure smooth scaling of feature
               maps up and down the 6 levels of the FPN pyramid (2**6=64).
        crop: Picks random crops from the image. First, scales the image based
              on min_dim and min_scale, then picks a random crop of
              size min_dim x min_dim. Can be used in training only.
              max_dim is not used in this mode.

    Returns:
    image: the resized image
    window: (y1, x1, y2, x2). If max_dim is provided, padding might
        be inserted in the returned image. If so, this window is the
        coordinates of the image part of the full image (excluding
        the padding). The x2, y2 pixels are not included.
    scale: The scale factor used to resize the image
    padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
    """
    # Keep track of image dtype and return results in the same dtype
    image_dtype = image.dtype
    # Default window (y1, x1, y2, x2) and default scale == 1.
    h, w = image.shape[:2]
    window = (0, 0, h, w)
    scale = 1
    padding = [(0, 0), (0, 0), (0, 0)]
    crop = None

    if mode == "none":
        return image, window, scale, padding, crop

    # Scale?
    if min_dim:
        # Scale up but not down
        scale = max(1, min_dim / min(h, w))
    if min_scale and scale < min_scale:
        scale = min_scale

    # Does it exceed max dim?
    if max_dim and mode == "square":
        image_max = max(h, w)
        if round(image_max * scale) > max_dim:
            scale = max_dim / image_max

    # Resize image using bilinear interpolation
    if scale != 1:
        image = resize(image, (round(h * scale), round(w * scale)),
                       preserve_range=True)

    # Need padding or cropping?
    if mode == "square":
        # Get new height and width
        h, w = image.shape[:2]
        top_pad = (max_dim - h) // 2
        bottom_pad = max_dim - h - top_pad
        left_pad = (max_dim - w) // 2
        right_pad = max_dim - w - left_pad
        padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
        image = np.pad(image, padding, mode='constant', constant_values=0)
        window = (top_pad, left_pad, h + top_pad, w + left_pad)
    elif mode == "pad64":
        h, w = image.shape[:2]
        # Both sides must be divisible by 64
        assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
        # Height
        if h % 64 > 0:
            max_h = h - (h % 64) + 64
            top_pad = (max_h - h) // 2
            bottom_pad = max_h - h - top_pad
        else:
            top_pad = bottom_pad = 0
        # Width
        if w % 64 > 0:
            max_w = w - (w % 64) + 64
            left_pad = (max_w - w) // 2
            right_pad = max_w - w - left_pad
        else:
            left_pad = right_pad = 0
        padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
        image = np.pad(image, padding, mode='constant', constant_values=0)
        window = (top_pad, left_pad, h + top_pad, w + left_pad)
    elif mode == "crop":
        # Pick a random crop
        h, w = image.shape[:2]
        y = random.randint(0, (h - min_dim))
        x = random.randint(0, (w - min_dim))
        crop = (y, x, min_dim, min_dim)
        image = image[y:y + min_dim, x:x + min_dim]
        window = (0, 0, min_dim, min_dim)
    else:
        raise Exception("Mode {} not supported".format(mode))
    return image.astype(image_dtype), window, scale, padding, crop
Example #52
0
    def complete(self, comppars, X, parfile=None, sigma=0.0):
        """
        Finds the best representation that can complete any missing
        part of the ASPA code.

        Input: any spectrum correctly converted into an ASPA code
        """
        outDir = directory(comppars['outDir'])
        maskType = comppars['maskType']
        centerScale = comppars['centerScale']
        nIter = int(comppars['nIter'])
        outInterval = int(comppars['outInterval'])
        approach = comppars['approach']
        make_corner = bool(comppars['make_corner'])

        beta1 = comppars['beta1']
        beta2 = comppars['beta2']
        lr = comppars['lr']
        eps = comppars['eps']

        hmcL = int(comppars['hmcL'])
        hmcEps = comppars['hmcEps']
        hmcBeta = comppars['hmcBeta']
        hmcAnneal = comppars['hmcAnneal']
        checkpoint_dir = directory(comppars['checkpointDir'])

        build_directories(comppars)

        if type(X) == dict:
            X_to_split = copy.deepcopy(X)
        elif type(X) == str:
            if X[-3:] == 'dat':
                X_to_split = str(X)
        else:
            X_to_split = np.array(X)

        try:
            if type(X) != np.ndarray:
                true_spectrum = X
            else:
                true_spectrum = None
        except IOError:
            true_spectrum = None

        with self.sess.as_default():
            try:
                tf.global_variables_initializer().run()
            except:
                tf.initialize_all_variables().run()

        isLoaded = self.load(checkpoint_dir)
        assert (isLoaded)

        grids = Grids()
        wnw_grid = grids.wnw_grid

        nImgs = self.batch_size

        batch_idxs = int(np.ceil(nImgs / self.batch_size))
        if maskType == 'random':
            fraction_masked = 0.2
            mask = np.ones(self.image_shape)
            mask[
                np.random.random(self.image_shape[:2]) < fraction_masked] = 0.0
        elif maskType == 'center':
            assert (centerScale <= 0.5)
            mask = np.ones(self.image_shape)
            l = int(self.image_size * centerScale)
            u = int(self.image_size * (1.0 - centerScale))
            mask[l:u, l:u, :] = 0.0
        elif maskType == 'left':
            mask = np.ones(self.image_shape)
            c = self.image_size // 2
            mask[:, :c, :] = 0.0
        elif maskType == 'full':
            mask = np.ones(self.image_shape)
        elif maskType == 'grid':
            mask = np.zeros(self.image_shape)
            mask[::4, ::4, :] = 1.0
        elif maskType == 'lowres':
            mask = np.zeros(self.image_shape)
        elif maskType == 'parameters':
            assert (centerScale <= 0.5)
            mask = np.ones(self.image_shape)
            mask[-3:, :, :] = 0.0
            mask[:, -3:, :] = 0.0
            mask[-10:, -10:, :] = 0.0
        elif maskType == 'wfc3':
            assert (centerScale <= 0.5)
            m_size = self.image_size - 10
            mask = np.ones(self.image_shape)
            fake_spec = np.ones(m_size**2)
            fake_spec[:334] = 0.0
            fake_spec[384:] = 0.0
            fake_spec = fake_spec.reshape((m_size, m_size))
            mask[:m_size, :m_size, 0] = fake_spec
            mask[-8:, :, :] = 0.0
            mask[:, -10:, :] = 0.0
            mask[-10:, -10:, :] = 0.0
        else:
            assert (False)

        for idx in xrange(0, batch_idxs):
            l = idx * self.batch_size
            u = min((idx + 1) * self.batch_size, nImgs)
            batchSz = u - l
            if type(X) != str:
                Xtrue = get_spectral_matrix(X, size=self.image_size - 10)
                Xt = get_test_image(X,
                                    sigma=sigma,
                                    size=self.image_size,
                                    batch_size=self.batch_size)
            else:
                Xtrue = get_spectral_matrix(X,
                                            parfile=parfile,
                                            size=self.image_size - 10)
                Xt = get_test_image(X,
                                    sigma=sigma,
                                    size=self.image_size,
                                    batch_size=self.batch_size,
                                    parfile=parfile)
            spec_parameters = get_parameters(Xtrue, size=self.image_size)

            batch = Xt
            batch_images = np.array(batch).astype(np.float32)
            if batchSz < self.batch_size:
                print(batchSz)
                padSz = ((0, int(self.batch_size - batchSz)), (0, 0), (0, 0),
                         (0, 0))
                batch_images = np.pad(batch_images, padSz, 'constant')

                batch_images = batch_images.astype(np.float32)

            zhats = np.random.uniform(-1,
                                      1,
                                      size=(self.batch_size, self.z_dim))
            m = 0
            v = 0

            nImgs = 1
            nRows = int(np.sqrt(nImgs))
            nCols = int(np.sqrt(nImgs))
            #      save_images(batch_images[:nImgs, :, :, :], [nRows, nCols],
            #                  os.path.join(config.outDir, 'before.pdf'))
            plt.imsave(os.path.join(outDir, 'before.png'),
                       Xtrue[:, :, 0],
                       cmap='gist_gray',
                       format='png')
            plt.close()
            resize(os.path.join(outDir, 'before.png'))

            masked_images = np.multiply(batch_images, mask)
            #      save_images(masked_images[:nImgs, :, :, :], [nRows, nCols],
            #                  os.path.join(config.outDir, 'masked.pdf'))
            plt.imsave(os.path.join(outDir, 'masked.png'),
                       masked_images[0, :, :, 0],
                       cmap='gist_gray',
                       format='png')
            plt.close()
            resize(os.path.join(outDir, 'masked.png'))

            for img in range(batchSz):
                with open(
                        os.path.join(outDir,
                                     'logs/hats_{:02d}.log'.format(img)),
                        'a') as f:
                    f.write('iter loss ' + ' '.join(
                        ['z{}'.format(zi) for zi in range(self.z_dim)]) + '\n')

            for i in xrange(nIter):

                fd = {
                    self.z: zhats,
                    self.mask: mask,
                    self.images: batch_images,
                    self.is_training: False
                }
                run = [self.complete_loss, self.grad_complete_loss, self.G]
                loss, g, G_imgs = self.sess.run(run, feed_dict=fd)

                for img in range(batchSz):
                    with open(
                            os.path.join(outDir,
                                         'logs/hats_{:02d}.log'.format(img)),
                            'ab') as f:
                        f.write('{} {} '.format(i, loss[img]).encode())
                        np.savetxt(f, zhats[img:img + 1])

                if i % outInterval == 0:
                    prediction_file = open(
                        outDir + 'predictions/prediction_{:04d}.txt'.format(i),
                        'w')

                    ranges = []
                    ground_truths = []
                    gan_avg = []
                    gan_p_err = []
                    gan_m_err = []

                    if type(X) == str:
                        """
                        If the input spectrum is synthetic, you know the parameters array 
                        and you want to compare the real value with the retrieved one, if
                        your spectrum does not contain a molecule, the default value is fixed
                        to -7.9
                        """
                        pp = ParameterParser()
                        pp.read(parfile)
                        real_pars = pp.full_dict()

                        real_tp = float(real_pars['Atmosphere']['tp_iso_temp'])
                        real_rp = float(real_pars['Planet']['radius'])
                        real_mp = float(real_pars['Planet']['mass'])
                        atm_active_gases = np.array([
                            gas.upper()
                            for gas in real_pars['Atmosphere']['active_gases']
                        ])
                        atm_active_gases_mixratios = np.array(
                            real_pars['Atmosphere']['active_gases_mixratios'])
                        real_mol = check_molecule_existence(
                            ['CO', 'CO2', 'H2O', 'CH4'],
                            atm_active_gases_mixratios,
                            atm_active_gases,
                            default=-7.9)
                        ground_truths = np.array(real_mol +
                                                 [real_rp, real_mp, real_tp])

                    elif true_spectrum != None and type(X) != str:
                        h2o = np.log10(true_spectrum['param']['h2o_mixratio'])
                        ch4 = np.log10(true_spectrum['param']['ch4_mixratio'])
                        co2 = np.log10(true_spectrum['param']['co2_mixratio'])
                        co = np.log10(true_spectrum['param']['co_mixratio'])
                        rp = true_spectrum['param']['planet_radius'] / RJUP
                        mp = true_spectrum['param']['planet_mass'] / MJUP
                        tp = true_spectrum['param']['temperature_profile']
                        ground_truths = np.array(
                            [co, co2, h2o, ch4, rp, mp, tp])
                        real_mol = np.zeros(4)
                    else:
                        ground_truths = np.array([None] * 7)
                        real_mol = np.zeros(4)

                    parameters = ['CO', 'CO2', 'H2O', 'CH4', 'Rp', 'Mp', 'Tp']
                    labels = [
                        '$\log{CO}$', '$\log{CO_2}$', '$\log{H_2O}$',
                        '$\log{CH_4}$', '$R_p (R_j)$', '$M_p (M_j)$', '$T_p$'
                    ]

                    all_hists = []
                    for mol in parameters:
                        prediction_file, gan_avg, gan_p_err, gan_m_err, ranges, all_hists = \
                            histogram_par(mol, G_imgs, batchSz, self.image_size,
                                          ground_truths, all_hists,
                                          prediction_file, gan_avg,
                                          gan_p_err, gan_m_err, ranges)

                    all_hists = np.array(all_hists).T

                    if make_corner:
                        make_corner_plot(all_hists, ranges, labels,
                                         ground_truths, comppars, i)
                    """
                    Plot histograms
                    """
                    hist_dict = {}
                    f, ax = plt.subplots(2, 4, figsize=(21, 15))
                    all_hists = all_hists.T
                    ii = 0
                    jj = 0
                    for his in range(len(all_hists)):
                        if his == 4:
                            ii = 1
                            jj = 4
                        hist_dict[labels[his]] = {}
                        weights = np.ones_like(all_hists[his]) / float(
                            len(all_hists[his]))
                        hist_dict[labels[his]]['histogram'] = all_hists[his]
                        hist_dict[labels[his]]['weights'] = weights
                        hist_dict[labels[his]]['bins'] = ranges[his]
                        ax[ii, his - jj].hist(all_hists[his],
                                              bins=np.linspace(
                                                  min(ranges[his]),
                                                  max(ranges[his]), 20),
                                              color='firebrick',
                                              weights=weights)
                        #            ax[his].set_ylim(0, 1)
                        ax[ii, his - jj].set_xlim(min(ranges[his]),
                                                  max(ranges[his]))
                        ax[ii, his - jj].axvline(gan_avg[his],
                                                 c='g',
                                                 label='ExoGAN mean')
                        ax[ii, his - jj].axvline(ground_truths[his],
                                                 c='b',
                                                 label='Input value')
                        ax[ii, his - jj].set_xlabel(labels[his] + \
                                                    ' = $%1.2f_{-%1.2f}^{%1.2f}$' % (
                                                    gan_avg[his], gan_m_err[his], gan_p_err[his]))
                        if his == 3:
                            ax[ii, his - jj].legend()
                        #            ax[his].annotate('$%1.2f_{-%1.2f}^{%1.2f}$' % (gan_avg[his], gan_p_err[his], gan_m_err[his]),
                        #               bbox=dict(boxstyle="round4", fc="w", alpha=0.5),
                        #               xy=(gan_avg[his], max(weights)*(0.9)),
                        #               xycoords='data')
                        ax[ii, his - jj].axvline(gan_avg[his] + gan_p_err[his],
                                                 c='k',
                                                 linestyle='--')
                        ax[ii, his - jj].axvline(gan_avg[his] - gan_m_err[his],
                                                 c='k',
                                                 linestyle='--')
                    ax[-1, -1].axis('off')
                    plt.subplots_adjust(right=1.2)

                    histName = os.path.join(
                        outDir, 'histograms/all_par/{:04d}.pdf'.format(i))
                    plt.savefig(histName, bbox_inches='tight')
                    plt.close()
                    histpickle = os.path.join(
                        outDir, 'histograms/all_par/histogram.pickle')
                    with open(histpickle, 'wb') as fp:
                        pickle.dump(hist_dict, fp)

                    real_spec = Xtrue[:self.image_size, :self.image_size, :]
                    real_spec = real_spec[:23, :23, 0].flatten()

                    chi_square = []
                    spectra = []
                    f, ax = plt.subplots(sharey=True, figsize=(12, 6))
                    for k in range(batchSz):
                        spectrum = G_imgs[
                            k, :self.image_size, :self.image_size, :]
                        spectrum = spectrum[:23, :23, 0].flatten()
                        spectra.append(spectrum)
                        chi_square.append(
                            chisquare(spectrum[:440],
                                      f_exp=real_spec[:440])[0])
                    best_ind = chi_square.index(min(chi_square))

                    print(i, np.mean(loss[0:batchSz]))
                    imgName = os.path.join(outDir,
                                           'hats_imgs/{:04d}.png'.format(i))

                    #          save_images(G_imgs[:nImgs, :, :, :], [nRows, nCols], imgName)
                    plt.imsave(imgName,
                               G_imgs[best_ind, :, :, 0],
                               cmap='gist_gray',
                               format='png')
                    plt.close()
                    resize(imgName)

                    inv_masked_hat_images = np.multiply(G_imgs, 1.0 - mask)
                    completed = masked_images + inv_masked_hat_images
                    imgName = os.path.join(outDir,
                                           'completed/{:04d}.png'.format(i))
                    #          save_images(completed[:nImgs, :, :, :], [nRows, nCols], imgName)
                    plt.imsave(imgName,
                               completed[best_ind, :, :, 0],
                               cmap='gist_gray',
                               format='png')
                    plt.close()
                    resize(imgName)

                    if spectra_int_norm:
                        # Compared real spectrum with the generated one
                        spectra_int_norm(Xtrue, self.image_size, wnw_grid,
                                         batchSz, G_imgs, comppars, i)

                    if spectra_norm:
                        # Compare spectra with original normalisation between 0 and 1
                        spectra_norm(Xtrue, self.image_size, wnw_grid, batchSz,
                                     G_imgs, comppars, i)

                    if spectra_real_norm:
                        # Compare spectra with the normalisation factor from the real spectrum
                        spectra_real_norm(Xtrue, self.image_size, wnw_grid,
                                          batchSz, G_imgs, comppars, i)

                if approach == 'adam':
                    # Optimize single completion with Adam
                    m_prev = np.copy(m)
                    v_prev = np.copy(v)
                    m = beta1 * m_prev + (1 - beta1) * g[0]
                    v = beta2 * v_prev + (1 - beta2) * np.multiply(g[0], g[0])
                    m_hat = m / (1 - beta1**(i + 1))
                    v_hat = v / (1 - beta2**(i + 1))
                    zhats += -np.true_divide(lr * m_hat,
                                             (np.sqrt(v_hat) + eps))
                    zhats = np.clip(zhats, -1, 1)

                elif approach == 'hmc':
                    # Sample example completions with HMC (not in paper)
                    zhats_old = np.copy(zhats)
                    loss_old = np.copy(loss)
                    v = np.random.randn(self.batch_size, self.z_dim)
                    v_old = np.copy(v)

                    for steps in range(hmcL):
                        v -= hmcEps / 2 * hmcBeta * g[0]
                        zhats += hmcEps * v
                        np.copyto(zhats, np.clip(zhats, -1, 1))
                        loss, g, _, _ = self.sess.run(run, feed_dict=fd)
                        v -= hmcEps / 2 * hmcBeta * g[0]

                    for img in range(batchSz):
                        logprob_old = hmcBeta * loss_old[img] + np.sum(
                            v_old[img]**2) / 2
                        logprob = hmcBeta * loss[img] + np.sum(v[img]**2) / 2
                        accept = np.exp(logprob_old - logprob)
                        if accept < 1 and np.random.uniform() > accept:
                            np.copyto(zhats[img], zhats_old[img])

                    hmcBeta *= hmcAnneal

                else:
                    assert (False)
    for each in original_features:
        sm = []
        for beach in cover_features:
            sm.append(euclidean(each, beach))
        similarity_matrix.append(sm)
    return np.array(similarity_matrix)

model = load_model(sys.argv[1])

original_song = sys.argv[2]
cover_song = sys.argv[3]

original_signal = librosa.load(original_song)[0]
cover_signal = librosa.load(cover_song)[0]

original_features = extract_features(original_signal)
cover_features = extract_features(cover_signal)

oti_cover_features = oti_func(original_features, cover_features)

mat = sim_matrix(original_features, oti_cover_features)

if mat.shape[0] < 180:
    mat = np.pad(mat, ((0,180 - mat.shape[0]),(0,0)), mode = 'constant', constant_values=0)
if mat.shape[1] < 180:
    mat = np.pad(mat, ((0,0),(0,180 - mat.shape[1])), mode = 'constant', constant_values=0)

ans = model.predict(mat.reshape(1,180,180,1))
if ans[0][0] < ans[0][1]: print("The song is a cover pair with probability of : {}".format(ans[0][1]))
else: print("The song is not a cover pair with probability of : {}".format(ans[0][0]))
Example #54
0
def coarseness(image, voxelspacing = None, mask = slice(None)):
    r"""
    Takes a simple or multi-spectral image and returns the coarseness of the texture.
    
    Step1  At each pixel, compute six averages for the windows of size 2**k x 2**k,
            k=0,1,...,5, around the pixel. 
    Step2  At each pixel, compute absolute differences E between the pairs of non 
            overlapping averages in every directions.
    step3  At each pixel, find the value of k that maximises the difference Ek in either 
            direction and set the best size Sbest=2**k
    step4  Compute the coarseness feature Fcrs by averaging Sbest over the entire image.

    Parameters
    ----------
    image : array_like or list/tuple of array_like 
        A single image or a list/tuple of images (for multi-spectral case).
    voxelspacing : sequence of floats
        The side-length of each voxel.
    mask : array_like
        A binary mask for the image or a slice object
        
    Returns
    -------
    coarseness : float
        The size of coarseness of the given texture. It is basically the size of
        repeating elements in the image. 
        
    See Also
    --------
    
    
    """
    # Step1:  At each pixel (x,y), compute six averages for the windows
    # of size 2**k x 2**k, k=0,1,...,5, around the pixel.

    image = numpy.asarray(image, dtype=numpy.float32)
   
  
    # set default mask or apply given mask
    if not type(mask) is slice:
        if not type(mask[0] is slice):
            mask = numpy.array(mask, copy=False, dtype = numpy.bool)
    image = image[mask]
    
    # set default voxel spacing if not suppliec
    if None == voxelspacing:
        voxelspacing = tuple([1.] * image.ndim)
    
    if len(voxelspacing) != image.ndim:
        print "Voxel spacing and image dimensions do not fit."
        return None
    # set padding for image border control
    padSize = tuple((numpy.rint((2**5.0) * voxelspacing[jj]),0) for jj in xrange(image.ndim))        
    Apad = numpy.pad(image,pad_width=padSize, mode='reflect')

    # Allocate memory
    E = numpy.empty((6,image.ndim)+image.shape)

    # prepare some slicer 
    rawSlicer           = [slice(None)] * image.ndim
    slicerForImageInPad = [slice(padSize[d][0],None)for d in xrange(image.ndim)]

    for k in xrange(6):

        size_vs = tuple(numpy.rint((2**k) * voxelspacing[jj]) for jj in xrange(image.ndim))
        A = uniform_filter(Apad, size = size_vs, mode = 'mirror')

        # Step2: At each pixel, compute absolute differences E(x,y) between 
        # the pairs of non overlapping averages in the horizontal and vertical directions.
        for d in xrange(image.ndim):
            borders = numpy.rint((2**k) * voxelspacing[d])
            
            slicerPad_k_d   = slicerForImageInPad[:]
            slicerPad_k_d[d]= slice((padSize[d][0]-borders if borders < padSize[d][0] else 0),None)
            A_k_d           = A[slicerPad_k_d]

            AslicerL        = rawSlicer[:]
            AslicerL[d]     = slice(0, -borders)
            
            AslicerR        = rawSlicer[:]
            AslicerR[d]     = slice(borders, None)

            E[k,d,...] = numpy.abs(A_k_d[AslicerL] - A_k_d[AslicerR])

    # step3: At each pixel, find the value of k that maximises the difference Ek(x,y)
    # in either direction and set the best size Sbest(x,y)=2**k
    
    k_max = E.max(1).argmax(0)
    dim = E.argmax(1)
    dim_vox_space = numpy.asarray([voxelspacing[dim[k_max.flat[i]].flat[i]] for i in xrange(k_max.size)]).reshape(k_max.shape) 
    S = (2**k_max) * dim_vox_space

    # step4: Compute the coarseness feature Fcrs by averaging Sbest(x,y) over the entire image.
    return S.mean()
def pad_nd_img(image,
               new_shape=None,
               mode="edge",
               kwargs=None,
               return_slicer=False,
               shape_must_be_divisible_by=None):
    """
    one padder to pad them all. Documentation? Well okay. A little bit
    """
    if kwargs is None:
        kwargs = {}

    if new_shape is not None:
        old_shape = np.array(image.shape[-len(new_shape):])
    else:
        assert shape_must_be_divisible_by is not None
        assert isinstance(shape_must_be_divisible_by,
                          (list, tuple, np.ndarray))
        new_shape = image.shape[-len(shape_must_be_divisible_by):]
        old_shape = new_shape

    num_axes_nopad = len(image.shape) - len(new_shape)

    new_shape = [
        max(new_shape[i], old_shape[i]) for i in range(len(new_shape))
    ]

    if not isinstance(new_shape, np.ndarray):
        new_shape = np.array(new_shape)

    if shape_must_be_divisible_by is not None:
        if not isinstance(shape_must_be_divisible_by,
                          (list, tuple, np.ndarray)):
            shape_must_be_divisible_by = [shape_must_be_divisible_by
                                          ] * len(new_shape)
        else:
            assert len(shape_must_be_divisible_by) == len(new_shape)

        for i in range(len(new_shape)):
            if new_shape[i] % shape_must_be_divisible_by[i] == 0:
                new_shape[i] -= shape_must_be_divisible_by[i]

        new_shape = np.array([
            new_shape[i] + shape_must_be_divisible_by[i] -
            new_shape[i] % shape_must_be_divisible_by[i]
            for i in range(len(new_shape))
        ])

    difference = new_shape - old_shape
    pad_below = difference // 2
    pad_above = difference // 2 + difference % 2
    pad_list = [[0, 0]] * num_axes_nopad + list(
        [list(i) for i in zip(pad_below, pad_above)])
    res = np.pad(image, pad_list, mode, **kwargs)
    if not return_slicer:
        return res
    else:
        pad_list = np.array(pad_list)
        pad_list[:, 1] = np.array(res.shape) - pad_list[:, 1]
        slicer = list(slice(*i) for i in pad_list)
        return res, slicer
Example #56
0
def _get_compiled_theano_functions(N_QUAD_PTS):
    # Planet masses: m1,m2
    m1, m2 = T.dscalars(2)
    mstar = 1
    mu1 = m1 * mstar / (mstar + m1)
    mu2 = m2 * mstar / (mstar + m2)
    eta1 = mstar + m1
    eta2 = mstar + m2
    beta1 = mu1 * T.sqrt(eta1 / mstar) / (mu1 + mu2)
    beta2 = mu2 * T.sqrt(eta2 / mstar) / (mu1 + mu2)
    j, k = T.lscalars('jk')
    s = (j - k) / k

    # Angle variable for averaging over
    psi = T.dvector('psi')

    # Quadrature weights
    quad_weights = T.dvector('w')

    # Dynamical variables:
    Ndof = 3
    Nconst = 1
    dyvars = T.vector()
    s1, s2, phi, I1, I2, Phi, dRtilde = [
        dyvars[i] for i in range(2 * Ndof + Nconst)
    ]

    a20 = T.constant(1.)
    a10 = ((j - k) / j)**(2 / 3) * (eta1 / eta2)**(1 / 3) * a20
    L10 = beta1 * T.sqrt(a10)
    L20 = beta2 * T.sqrt(a20)
    Psi = s * L20 + (1 + s) * L10
    Rtilde = dRtilde - L10 - L20
    ####
    # angles
    ####
    rtilde = T.constant(0.)
    Omega = -1 * rtilde
    l1 = phi + k * (1 + s) * psi + Omega
    l2 = phi + k * s * psi + Omega
    gamma1 = s1 - phi - Omega
    gamma2 = s2 - phi - Omega
    q1 = 0.5 * np.pi - Omega
    q2 = -0.5 * np.pi - Omega

    pomega1 = -1 * gamma1
    pomega2 = -1 * gamma2
    Omega1 = -1 * q1
    Omega2 = -1 * q2
    omega1 = pomega1 - Omega1
    omega2 = pomega2 - Omega2

    ###
    # actions
    ###
    Gamma1 = I1
    Gamma2 = I2
    L1 = Psi / k - s * (I1 + I2) - s * Phi
    L2 = -1 * Psi / k + (1 + s) * (I1 + I2) + (1 + s) * Phi
    Cz = -1 * Rtilde

    R = L1 + L2 - Gamma1 - Gamma2 - Cz
    G1 = L1 - Gamma1
    G2 = L2 - Gamma2

    r2_by_r1 = (L2 - L1 - Gamma2 + Gamma1) / (L1 + L2 - Gamma1 - Gamma2 - R)
    rho1 = 0.5 * R * (1 + r2_by_r1)
    rho2 = 0.5 * R * (1 - r2_by_r1)

    a1 = (L1 / beta1)**2
    e1 = T.sqrt(1 - (1 - (Gamma1 / L1))**2)

    a2 = (L2 / beta2)**2
    e2 = T.sqrt(1 - (1 - (Gamma2 / L2))**2)

    cos_inc1 = 1 - rho1 / G1
    cos_inc2 = 1 - rho2 / G2
    inc1 = T.arccos(cos_inc1)
    inc2 = T.arccos(cos_inc2)

    Hkep = -0.5 * T.sqrt(eta1) * beta1 / a1 - 0.5 * T.sqrt(eta2) * beta2 / a2

    ko = KeplerOp()
    M1 = l1 - pomega1
    M2 = l2 - pomega2
    sinf1, cosf1 = ko(M1, e1 + T.zeros_like(M1))
    sinf2, cosf2 = ko(M2, e2 + T.zeros_like(M2))
    #
    n1 = T.sqrt(eta1 / mstar) * a1**(-3 / 2)
    n2 = T.sqrt(eta2 / mstar) * a2**(-3 / 2)
    Hint_dir, Hint_ind, r1, r2, v1, v2 = calc_Hint_components_sinf_cosf(
        a1, a2, e1, e2, inc1, inc2, omega1, omega2, Omega1, Omega2, n1, n2,
        sinf1, cosf1, sinf2, cosf2)
    eps = m1 * m2 / (mu1 + mu2) / T.sqrt(mstar)
    Hpert = (Hint_dir + Hint_ind / mstar)
    Hpert_av = Hpert.dot(quad_weights)
    Htot = Hkep + eps * Hpert_av

    #####################################################
    # Set parameters for compiling functions with Theano
    #####################################################

    # Get numerical quadrature nodes and weights
    nodes, weights = np.polynomial.legendre.leggauss(N_QUAD_PTS)

    # Rescale for integration interval from [-1,1] to [-pi,pi]
    nodes = nodes * np.pi
    weights = weights * 0.5

    # 'givens' will fix some parameters of Theano functions compiled below
    givens = [(psi, nodes), (quad_weights, weights)]

    # 'ins' will set the inputs of Theano functions compiled below
    #   Note: 'extra_ins' will be passed as values of object attributes
    #   of the 'ResonanceEquations' class 'defined below
    extra_ins = [m1, m2, j, k]
    ins = [dyvars] + extra_ins
    orbels = [a1, e1, inc1, k * s1, a2, e2, inc2, k * s2, phi, Omega]
    orbels_dict = dict(
        zip([
            'a1', 'e1', 'inc1', 'theta1', 'a2', 'e2', 'inc2', 'theta2', 'phi'
        ], orbels))
    actions = [L1, L2, Gamma1, Gamma2, rho1, rho2]
    actions_dict = dict(
        zip(['L1', 'L2', 'Gamma1', 'Gamma2', 'Q1', 'Q2'], actions))

    #  Conservative flow
    gradHtot = T.grad(Htot, wrt=dyvars)
    hessHtot = theano.gradient.hessian(Htot, wrt=dyvars)
    Jtens = T.as_tensor(
        np.pad(_get_Omega_matrix(Ndof), (0, Nconst), 'constant'))
    H_flow_vec = Jtens.dot(gradHtot)
    H_flow_jac = Jtens.dot(hessHtot)

    ##########################
    # Compile Theano functions
    ##########################
    orbels_fn = theano.function(inputs=ins,
                                outputs=orbels_dict,
                                givens=givens,
                                on_unused_input='ignore')
    actions_fn = theano.function(inputs=ins,
                                 outputs=actions_dict,
                                 givens=givens,
                                 on_unused_input='ignore')
    Rtilde_fn = theano.function(inputs=ins,
                                outputs=Rtilde,
                                givens=givens,
                                on_unused_input='ignore')

    Htot_fn = theano.function(inputs=ins,
                              outputs=Htot,
                              givens=givens,
                              on_unused_input='ignore')

    Hpert_fn = theano.function(inputs=ins,
                               outputs=Hpert_av,
                               givens=givens,
                               on_unused_input='ignore')

    Hpert_components_fn = theano.function(
        inputs=ins,
        outputs=[Hint_dir.dot(quad_weights),
                 Hint_ind.dot(quad_weights)],
        givens=givens,
        on_unused_input='ignore')

    H_flow_vec_fn = theano.function(inputs=ins,
                                    outputs=H_flow_vec,
                                    givens=givens,
                                    on_unused_input='ignore')

    H_flow_jac_fn = theano.function(inputs=ins,
                                    outputs=H_flow_jac,
                                    givens=givens,
                                    on_unused_input='ignore')

    return dict({
        'orbital_elements': orbels_fn,
        'actions': actions_fn,
        'Rtilde': Rtilde_fn,
        'Hamiltonian': Htot_fn,
        'Hpert': Hpert_fn,
        'Hpert_components': Hpert_components_fn,
        'Hamiltonian_flow': H_flow_vec_fn,
        'Hamiltonian_flow_jacobian': H_flow_jac_fn
    })
Example #57
0
def compute_pyramid(net, psize, step, interval, image, mean_pixel=None):
    """Compute a pyramid of CNN-derived features for the given image. Similar
    to ``impyra.m``, except we haven't bothered upscaling, since Chen & Yuille
    don't upscale anyway.

    :param net: a ``caffe.Net`` instance corresponding to the fully
        convolutional "deploy" model.
    :param psize: parameter from Chen & Yuille. It's actually ``step * tsize``,
        where ``tsize`` is a kind of "natural" template size computed from the
        dimensions of skeletons in the training set. Unlike Chen & Yuille, we
        use **ROW MAJOR ORDER** for ``psize``!
    :param step: yet another parameter from Chen & Yuille. I actually have no
        idea what this corresponds to, intuitively.
    :param interval: how many pyramid levels does it take to halve the data
        size?
    :param image: ``h * w * c`` ``ndarray`` representing a single input image.
    :param mean_pixel: optional mean pixel argument.
    :returns: list of dictionaries with ``output_size``,
        ``{width,height}_pad``, ``scale`` and ``features`` keys. Each entry in
        the list corresponds to a level of the feature pyramid (largest scale
        first). The ``features`` key is an "image" representing the fully
        convolutional netowrk output, where the number of channels in the image
        is equal to the number of softmax outputs in the CNN."""
    assert image.ndim == 3 and image.shape[2] == 3

    if mean_pixel is None:
        mean_pixel = 128 * np.ones((3,))
    else:
        # Flip the mean pixel to BGR
        mean_pixel = mean_pixel[::-1]

    height_pad, width_pad = np.maximum(np.ceil((psize - 1) / 2.0), 0)\
        .astype('int')
    scale = 2 ** (1.0 / interval)
    image_size = np.array(image.shape[:2])
    max_scale = int(1 + np.floor(np.log(np.min(image_size)) / np.log(scale)))
    # This will have keys 'output_size', 'scale', 'height_pad', 'width_pad',
    # 'features'
    rv = [{} for _ in xrange(max_scale)]

    # A natural size, I guess
    max_batch_size = interval
    for batch_level in xrange(0, max_scale, max_batch_size):
        batch_size = np.min(max_batch_size, max_scale - batch_level)
        base_dims = image_size / scale ** (batch_level)
        scaled = cf.io.resize(image, base_dims.astype('int'))

        # This next array will be passed to Caffe
        caffe_input = np.zeros((
            batch_size,
            3,
            scaled.shape[1] + 2 * height_pad,
            scaled.shape[0] + 2 * width_pad,
        ))

        for sublevel in xrange(batch_level, batch_level + batch_size):
            # Pad and add to Caffe input
            pad_dims = (2 * (height_pad,), 2 * (width_pad,), 2 * (0,))
            padded = np.pad(scaled, pad_dims, mode='edge') - mean_pixel
            max_row, max_col = padded.shape[:2]
            caffe_input[sublevel - batch_level, :, :max_row, :max_col] = \
                padded.transpose((2, 0, 1))

            # Store metadata
            info = rv[sublevel]
            info['output_size'] = np.floor(
                (padded.shape[:2] - psize) / float(step)
            ).astype('int') + 1
            info['scale'] = step * scale ** (sublevel - 1)
            info['width_pad'] = width_pad / float(step)
            info['height_pad'] = height_pad / float(step)

            # Resize for the next step
            base_dims /= scale
            scaled = cf.io.resize(image, base_dims.astype('int'))

        # To do a fully convolutional forward pass, we just reshape the data
        # layer and let the rest follow
        net.blobs['data'].reshape(*caffe_input.shape)
        net.blobs['data'].data[...] = caffe_input
        # TODO: What does result contain? Apparently it's a dictionary mapping
        # blob names to ndarrays for those blobs. In this case, I guess we'll
        # have a batch_size * softmax_outputs * something * something_else
        # ndarray, where something and something_else will be decided by
        # some annoying arithmetic on strides, pads and steps. Ugh, gross.
        result = net.forward()['prob']

        for sublevel in xrange(batch_level, batch_level + batch_size):
            info = rv[sublevel]
            max_row, max_col = info['output_size']
            info['features'] = result[
                sublevel - batch_level, :, :max_row, :max_col
            ].transpose((1, 2, 0))

    return rv
def MakeNdarray(tensor):
  """Create a numpy ndarray from a tensor.

  Create a numpy ndarray with the same shape and data as the tensor.

  For example:

  ```python
  # Tensor a has shape (2,3)
  a = tf.constant([[1,2,3],[4,5,6]])
  proto_tensor = tf.make_tensor_proto(a)  # convert `tensor a` to a proto tensor
  tf.make_ndarray(proto_tensor) # output: array([[1, 2, 3],
  #                                              [4, 5, 6]], dtype=int32)
  # output has shape (2,3)
  ```

  Args:
    tensor: A TensorProto.

  Returns:
    A numpy array with the tensor contents.

  Raises:
    TypeError: if tensor has unsupported type.

  """
  shape = [d.size for d in tensor.tensor_shape.dim]
  num_elements = np.prod(shape, dtype=np.int64)
  tensor_dtype = dtypes.as_dtype(tensor.dtype)
  dtype = tensor_dtype.as_numpy_dtype

  if tensor.tensor_content:
    return (np.frombuffer(tensor.tensor_content,
                          dtype=dtype).copy().reshape(shape))

  if tensor_dtype == dtypes.string:
    # np.pad throws on these arrays of type np.object.
    values = list(tensor.string_val)
    padding = num_elements - len(values)
    if padding > 0:
      last = values[-1] if values else ""
      values.extend([last] * padding)
    return np.array(values, dtype=dtype).reshape(shape)

  if tensor_dtype == dtypes.float16 or tensor_dtype == dtypes.bfloat16:
    # the half_val field of the TensorProto stores the binary representation
    # of the fp16: we need to reinterpret this as a proper float16
    values = np.fromiter(tensor.half_val, dtype=np.uint16)
    values.dtype = tensor_dtype.as_numpy_dtype
  elif tensor_dtype == dtypes.float32:
    values = np.fromiter(tensor.float_val, dtype=dtype)
  elif tensor_dtype == dtypes.float64:
    values = np.fromiter(tensor.double_val, dtype=dtype)
  elif tensor_dtype in [
      dtypes.int32, dtypes.uint8, dtypes.uint16, dtypes.int16, dtypes.int8,
      dtypes.qint32, dtypes.quint8, dtypes.qint8, dtypes.qint16, dtypes.quint16
  ]:
    values = np.fromiter(tensor.int_val, dtype=dtype)
  elif tensor_dtype == dtypes.int64:
    values = np.fromiter(tensor.int64_val, dtype=dtype)
  elif tensor_dtype == dtypes.complex64:
    it = iter(tensor.scomplex_val)
    values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype)
  elif tensor_dtype == dtypes.complex128:
    it = iter(tensor.dcomplex_val)
    values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype)
  elif tensor_dtype == dtypes.bool:
    values = np.fromiter(tensor.bool_val, dtype=dtype)
  else:
    raise TypeError("Unsupported tensor type: %s" % tensor.dtype)

  if values.size == 0:
    return np.zeros(shape, dtype)

  if values.size != num_elements:
    values = np.pad(values, (0, num_elements - values.size), "edge")

  return values.reshape(shape)
# Read image
image = cv2.imread('test_image.jpg')

# Resize image, if need
#(row_num_im, col_num_im, chan) = image.shape
#image = cv2.resize(image,(int(col_num_im/2), int(row_num_im/2)))

A = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).astype('float64')

# Matrix for output
(row_num, col_num) = A.shape
B = np.zeros((row_num, col_num))

# padding for convinience
A_padding = np.pad(A, pad_width=1, mode='constant', constant_values=0)

# Sobel operator
filt_size = 3
Sx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
Sy = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
for i in range(row_num):
    for j in range(col_num):
        # because we do padding, index changed
        idx_i_A = i + 1
        idx_j_A = j + 1
        A_block = A_padding[idx_i_A - 1:idx_i_A + 2, idx_j_A - 1:idx_j_A + 2]
        B[i, j] = np.sum(np.multiply(A_block, Sx)) + np.sum(
            np.multiply(A_block, Sy))

B_min = np.amin(B)
 def pad_x_single(x_single, max_length):
     assert x_single.shape[0] <= max_length
     pad_width_head = (0, max_length - x_single.shape[0])
     pad_width_tail = ((0, 0),) * (x_single.ndim - 1)
     return np.pad(x_single, (pad_width_head,) + pad_width_tail,
                   mode='constant', constant_values=pad_value)