Beispiel #1
0
def mmglblshow(X, border=0.0):
    """
        - Purpose
            Apply a random color table to a gray-scale image.
        - Synopsis
            Y = glblshow(X, border=0.0)
        - Input
            X:      Gray-scale (uint8 or uint16) image. Labeled image.
            border: Boolean Default: 0.0. Labeled image.
        - Output
            Y: Gray-scale (uint8 or uint16) or binary image.

    """
    from numpy import take, resize, shape
    from numpy.random import rand

    mmin = X.min()
    mmax = X.max()
    ncolors = mmax - mmin + 1
    R = to_int32(rand(ncolors)*255)
    G = to_int32(rand(ncolors)*255)
    B = to_int32(rand(ncolors)*255)
    if mmin == 0:
       R[0],G[0],B[0] = 0,0,0
    r=resize(take(R, X.ravel() - mmin),X.shape)
    g=resize(take(G, X.ravel() - mmin),X.shape)
    b=resize(take(B, X.ravel() - mmin),X.shape)
    Y=concat('d',r,g,b)
    return Y
Beispiel #2
0
def _infer_interval_breaks(coord, axis=0, check_monotonic=False):
    """
    >>> _infer_interval_breaks(np.arange(5))
    array([-0.5,  0.5,  1.5,  2.5,  3.5,  4.5])
    >>> _infer_interval_breaks([[0, 1], [3, 4]], axis=1)
    array([[-0.5,  0.5,  1.5],
           [ 2.5,  3.5,  4.5]])
    """
    coord = np.asarray(coord)

    if check_monotonic and not _is_monotonic(coord, axis=axis):
        raise ValueError("The input coordinate is not sorted in increasing "
                         "order along axis %d. This can lead to unexpected "
                         "results. Consider calling the `sortby` method on "
                         "the input DataArray. To plot data with categorical "
                         "axes, consider using the `heatmap` function from "
                         "the `seaborn` statistical plotting library." % axis)

    deltas = 0.5 * np.diff(coord, axis=axis)
    if deltas.size == 0:
        deltas = np.array(0.0)
    first = np.take(coord, [0], axis=axis) - np.take(deltas, [0], axis=axis)
    last = np.take(coord, [-1], axis=axis) + np.take(deltas, [-1], axis=axis)
    trim_last = tuple(slice(None, -1) if n == axis else slice(None)
                      for n in range(coord.ndim))
    return np.concatenate([first, coord[trim_last] + deltas, last], axis=axis)
Beispiel #3
0
 def onpick(event):
     ind = event.ind
     for i in ind:
         type = event.artist.get_label()
         msg = ''
         if type == 'Blobs':
             msg = 'Blob ' + str(i)
             for c in range(self.x.shape[1]):
                 msg += '\n  ' + self.cluster_vars[c][:self.cluster_vars[c].find('_mean')]+\
                         ': ' + str(round(np.take(self.x[:,c], i), 2))
             msg += '\n  (All values are z-scores)'
             neighbors = np.where(self.est.labels_ == self.est.labels_[i])[0]
             if len(neighbors) > 1:
                 msg += '\n  Other blobs in cluster: ' + \
                         ', '.join([k for k in neighbors.astype('str') if not k==str(i)]) + \
                         '\n'
         elif type == 'Clusters':
             msg = 'Cluster ' + str(i)
             msg += '\n  Center of cluster (all values in z-scores):'
             for c in range(self.est.cluster_centers_.shape[1]):
                 msg += '\n  ' + self.cluster_vars[c][:self.cluster_vars[c].find('_mean')]+\
                         ': ' + str(round(np.take(self.est.cluster_centers_[:,c], i), 2))
             inhabitants = np.where(self.est.labels_ == i)[0]
             msg += '\n  Blobs in cluster: ' + \
                     ', '.join([k for k in inhabitants.astype('str') if not k==str(i)])+'\n'
         print msg
Beispiel #4
0
    def _set_reach_dist(self, point_index, processed, X, nbrs):
        P = X[point_index:point_index + 1]
        # Assume that radius_neighbors is faster without distances
        # and we don't need all distances, nevertheless, this means
        # we may be doing some work twice.
        indices = nbrs.radius_neighbors(P, radius=self.max_eps,
                                        return_distance=False)[0]

        # Getting indices of neighbors that have not been processed
        unproc = np.compress((~np.take(processed, indices)).ravel(),
                             indices, axis=0)
        # Neighbors of current point are already processed.
        if not unproc.size:
            return

        # Only compute distances to unprocessed neighbors:
        if self.metric == 'precomputed':
            dists = X[point_index, unproc]
        else:
            dists = pairwise_distances(P, np.take(X, unproc, axis=0),
                                       self.metric, n_jobs=None).ravel()

        rdists = np.maximum(dists, self.core_distances_[point_index])
        improved = np.where(rdists < np.take(self.reachability_, unproc))
        self.reachability_[unproc[improved]] = rdists[improved]
        self.predecessor_[unproc[improved]] = point_index
Beispiel #5
0
def permute_2d(m, p):
    """Performs 2D permutation of matrix m according to p."""
    return m[p][:, p]
    # unused below
    m_t = np.transpose(m)
    r_t = np.take(m_t, p, axis=0)
    return np.take(np.transpose(r_t), p, axis=0)
    def getAllCurves(self, just_legend=False):
        """
        Ensures that the x-range of the curves
        is strictly monotonically increasing.
        Conserves curves legend and info dictionary.
        """
        curves = Plugin1DBase.Plugin1DBase.getAllCurves(self)
        if just_legend:
            return curves

        processedCurves = []
        for curve in curves:
            x, y, legend, info = curve[0:4]
            xproc = x[:]
            yproc = y[:]
            # Sort
            idx = numpy.argsort(xproc, kind='mergesort')
            xproc = numpy.take(xproc, idx)
            yproc = numpy.take(yproc, idx)
            # Ravel, Increasing
            xproc = xproc.ravel()
            idx = numpy.nonzero((xproc[1:] > xproc[:-1]))[0]
            xproc = numpy.take(xproc, idx)
            yproc = numpy.take(yproc, idx)
            processedCurves += [(xproc, yproc, legend, info)]
        return processedCurves
Beispiel #7
0
 def gt_topk(dat, axis, ret_typ, k, is_ascend):
     if ret_typ == "indices":
         if is_ascend:
             indices = np.arange(k)
         else:
             indices = np.arange(-1, -k-1, -1)
         ret = np.take(dat.argsort(axis=axis), axis=axis, indices=indices, mode='wrap')
     elif ret_typ == "value":
         if is_ascend:
             indices = np.arange(k)
         else:
             indices = np.arange(-1, -k-1, -1)
         ret = np.take(np.sort(dat, axis=axis), axis=axis, indices=indices, mode='wrap')
     else:
         assert dat.shape == (5, 5, 5, 5)
         assert axis is None or axis ==1
         ret = np.zeros(dat.shape)
         if is_ascend:
             indices = np.arange(k)
         else:
             indices = np.arange(-1, -k-1, -1)
         gt_argsort = np.take(dat.argsort(axis=axis), axis=axis, indices=indices, mode='wrap')
         if axis is None:
             ret.ravel()[gt_argsort] = 1
         else:
             for i in range(5):
                 for j in range(5):
                     for k in range(5):
                         ret[i, gt_argsort[i, :, j, k], j, k] = 1
     return ret
    def createDescriptorList( self ):
        coordsx = nu.array( [[ self.absx], [self.width], [1], [self.olw],[self.tx_width] ] )
        coordsy = nu.array( [[ self.absy], [self.height], [1], [self.olw],[self.tx_height] ] )

        length = len(self.thePointMatrix)
        pointsx= nu.dot( nu.take(self.thePointMatrix,(0,),1), coordsx )
        pointsy= nu.dot( nu.take(self.thePointMatrix,(1,),1), coordsy )
        points = nu.concatenate( (nu.reshape(pointsx,(length,1)), nu.reshape( pointsy,(length,1) ) ),1 )


        for aShapeName in self.theDescriptorList.keys():
            aDescriptor = self.theDescriptorList[ aShapeName ]
            aSpecific = aDescriptor[SD_SPECIFIC]
            aType = aDescriptor[SD_TYPE ]
            aSpecific[0] = []
            if aType in ( CV_RECT, CV_LINE, CV_ELL, CV_TEXT , CV_IMG):
                for aPointCode in self.theCodeMap[ aShapeName]:
                    x=points[aPointCode][0]
                    y=points[aPointCode][1]
                    aSpecific[0].extend( [x,y])
                if aType == CV_TEXT and aShapeName == "text":
                    aSpecific[SPEC_LABEL] = self.theLabel

            elif aType == CV_BPATH:
                for anArtCode in self.theCodeMap[ aShapeName ]:
                    decodedList = []
                    decodedList.append( anArtCode[0] )
                    for aPointCode in anArtCode[1:]:
                        x=points[aPointCode][0]
                        y=points[aPointCode][1]
                        decodedList.extend([x,y])
                    aSpecific[0].append( tuple(decodedList) )
Beispiel #9
0
def gather(mask):
    import glob

    flist = glob.glob(mask)
    for f in flist:
        print f
        dset = da.read_nc(f, ["csat", "lat", "cloudpts"])
        csat = dset["csat"].values
        lat = dset["lat"].values
        altitude = dset["csat"].altitude
        idx = dset["cloudpts"].values > 0
        del dset
        cpts = dict()
        nprof = dict()
        for l in lats:
            idx1 = np.where((lat >= lats[l][0]) & (lat < lats[l][1]))[0]
            idx2 = np.where((lat >= -lats[l][1]) & (lat < -lats[l][0]))[0]
            idx = np.concatenate([idx1, idx2])
            if l in cpts:
                nprof[l] = nprof[l] + idx.shape[0]
                cpts[l] = cpts[l] + np.take(csat, idx, axis=0).sum(axis=0)
            else:
                nprof[l] = np.sum(idx)
                cpts[l] = np.take(csat, idx, axis=0).sum(axis=0)
    cprofl = dict()
    for l in lats:
        cprofl[l] = 100.0 * cpts[l] / nprof[l]

    return cprofl, altitude
Beispiel #10
0
    def SNfunc(self,data,sig,significancefloor=0.5):
        D=data.ravel()
        S=sig.ravel()

        args=numpy.argsort(-D/S)
        D=numpy.take(D,args)
        S=numpy.take(S,args)
        Dsum=numpy.cumsum(D)
        Ssum=numpy.cumsum(S**2)**0.5
        SN=(Dsum/Ssum).max()

        #regional SN
        import scipy.ndimage as  ndimage
        data[data/sig<significancefloor]=0
        masks, multiplicity = ndimage.measurements.label(data)
        labels=numpy.arange(1, multiplicity+1)
        SNs=numpy.zeros(multiplicity+1)
        SNs[0]=SN
        for i in range(multiplicity):
            D=data[masks==i+1].ravel()
            S=sig[masks==i+1].ravel()
            args=numpy.argsort(-D/S)
            D=numpy.take(D,args)
            S=numpy.take(S,args)
            Dsum=numpy.cumsum(D)
            Ssum=numpy.cumsum(S**2)**0.5
            SNi=(Dsum/Ssum).max()
            SNs[i+1]=SNi
        SNs=-numpy.sort(-SNs)
        return SNs
Beispiel #11
0
 def convert_to_8_bit(self):
     """
     Convert 16-bit display data to 8-bit using a lookup table.
     """
     if self.intensity_scaling == 'autoscale':
         self.display_min = self.display_data_16.min()
         self.display_max = self.display_data_16.max()
         self._make_linear_lookup_table()
     elif self.intensity_scaling == 'median_filter_autoscale':
         filtered_image = ndimage.filters.median_filter(
             self.display_data_16, size=3, output=self.filtered_image)
         self.display_min = self.filtered_image.min()
         self.display_max = self.filtered_image.max()
         self._make_linear_lookup_table()
     if not hasattr(self, 'display_data_8'):
         self.display_data_8 = np.empty(
             self.buffer_shape[1:], dtype=np.uint8)
     np.take(self.lut, self.display_data_16, out=self.display_data_8)
     try:
         self.display_intensity_scaling_queue.get_nowait()
     except Queue.Empty:
         pass
     self.display_intensity_scaling_queue.put(
         (self.intensity_scaling, self.display_min, self.display_max))
     self.image = ArrayInterfaceImage(self.display_data_8, allow_copy=False)
     pyglet.gl.glTexParameteri( #Reset to no interpolation
             pyglet.gl.GL_TEXTURE_2D,
             pyglet.gl.GL_TEXTURE_MAG_FILTER,
             pyglet.gl.GL_NEAREST)
     if hasattr(self, 'window'):
         if not self.window.visible:
             self.window.set_visible(True)
     return None
Beispiel #12
0
 def test_take_output(self, level=rlevel):
     """Ensure that 'take' honours output parameter."""
     x = np.arange(12).reshape((3,4))
     a = np.take(x,[0,2],axis=1)
     b = np.zeros_like(a)
     np.take(x,[0,2],axis=1,out=b)
     assert_array_equal(a,b)
Beispiel #13
0
def is_quadrant_red(quadrant):
     indices = xrange(0,len(quadrant),3)
     red_quadrant = np.take(quadrant, indices)
     red_sum = np.sum(red_quadrant)
     red_avg = np.sum(red_quadrant) / len(red_quadrant)

     logging.debug("red avg: %s" % (red_avg))

     indices = xrange(1,len(quadrant),3)
     green_quadrant = np.take(quadrant, indices)
     green_sum = np.sum(green_quadrant)
     green_avg = np.sum(green_quadrant) / len(green_quadrant)

     logging.debug("green avg: %s" % (green_avg))

     indices = xrange(2,len(quadrant),3)
     blue_quadrant = np.take(quadrant, indices)
     blue_sum = np.sum(blue_quadrant)
     blue_avg = np.sum(blue_quadrant) / len(blue_quadrant)

     logging.debug("blue avg: %s" % (blue_avg))

     is_red = red_avg / (0.5 * (green_avg + blue_avg))
     logging.debug("redcalc: %s" % (is_red))

     if is_red > 2:
         return 1
     else:
         return 0
def continuous_components(delta_X, delta_Y, delta_t, t, T, K):
       
    p = np.arange(K)
    
    delta_xp = np.take(delta_X, p)
    delta_yp = np.take(delta_Y, p)
    delta_tp = np.take(delta_t, p)
    tp = np.take(t, p)
    tp = np.hstack( ( np.array([0]) , tp ) )
    
    first_term_xi = np.cumsum(delta_X[0:K-1])
    second_term_xi = (delta_X[1:K]/delta_t[1:K]) * np.cumsum(delta_t[0:K-1])
    xi = np.hstack( ( np.array([0]), first_term_xi - second_term_xi ) )
    
    first_term_delta = np.cumsum(delta_Y[0:K-1])
    second_term_delta = (delta_Y[1:K]/delta_t[1:K]) * np.cumsum(delta_t[0:K-1])
    delta = np.hstack( ( np.array([0]), first_term_delta - second_term_delta ) )
        
    A0 = (1/T)*np.sum( (delta_xp/(2*delta_tp) * (np.square(tp[1:K+1]) - np.square(tp[0:K]))) + \
                       xi * (tp[1:K+1] - tp[0:K]))

    C0 = (1/T)*np.sum( (delta_yp/(2*delta_tp) * (np.square(tp[1:K+1]) - np.square(tp[0:K]))) + \
                       delta * (tp[1:K+1] - tp[0:K]))
    
    return A0, C0
def reconstruct(efds, T, K):
    
    T=np.ceil(T)
    
    N=len(efds)
    
    reconstructed = np.zeros((T,2))
    
    n = np.arange(start=1,stop=N,step=1)
    t = np.arange(T)
    
    n_grid, t_grid = np.meshgrid( n, t )
    
    a_n_grid = np.take(efds[:,0], n_grid)
    b_n_grid = np.take(efds[:,1], n_grid)
    c_n_grid = np.take(efds[:,2], n_grid)
    d_n_grid = np.take(efds[:,3], n_grid)

    arg_grid = n_grid * t_grid / T
    
    cos_term = np.cos( 2 * np.pi * arg_grid )
    sin_term = np.sin( 2 * np.pi * arg_grid )
    
    reconstructed[:,0] = efds[0,0] + np.sum(a_n_grid * cos_term + b_n_grid * sin_term, axis=1)
    reconstructed[:,1] = efds[0,0] + np.sum(c_n_grid * cos_term + d_n_grid * sin_term, axis=1)

    return reconstructed
Beispiel #16
0
 def transposed(self, new_column_name, select_as_header=None, **kwargs):
     """returns the transposed table.
     
     Arguments:
         - new_column_name: the existing header will become a column with
           this name
         - select_as_header: current column name containing data to be used
           as the header. Defaults to the first column.
     """
     select_as_header = select_as_header or self.Header[0]
     assert select_as_header in self.Header, \
                 '"%s" not in table Header' % select_as_header
     
     raw_data = self.getRawData()
     raw_data.insert(0, self.Header)
     transposed = numpy.array(raw_data, dtype='O')
     transposed = transposed.transpose()
     
     # indices for the header and non header rows
     header_index = self.Header.index(select_as_header)
     
     data_indices = range(0, header_index)+range(header_index+1,
                                                 len(transposed))
     
     header = list(numpy.take(transposed, [header_index], axis=0)[0])
     header = [new_column_name]+header[1:] # [1:] slice excludes old name
     rows = numpy.take(transposed, data_indices, axis=0)
     return Table(header=header, rows=rows, **kwargs)
Beispiel #17
0
 def get_MW(self, F, mode='F^-1'):
     if type(F) is dict: # recursive case for many F's at once
         M,W = {}, {}
         for key in F: M[key],W[key] = self.get_MW(F[key], mode=mode)
         return M,W
     modes = ['F^-1', 'F^-1/2', 'I', 'L^-1']; assert(mode in modes)
     if mode == 'F^-1':
         M = np.linalg.pinv(F, rcond=1e-12)
         #U,S,V = np.linalg.svd(F)
         #M = np.einsum('ij,j,jk', V.T, 1./S, U.T)
     elif mode == 'F^-1/2':
         U,S,V = np.linalg.svd(F)
         M = np.einsum('ij,j,jk', V.T, 1./np.sqrt(S), U.T)
     elif mode == 'I':
         M = np.identity(F.shape[0], dtype=F.dtype)
     else:
         #Cholesky decomposition to get M
         order = np.array([10,11,9,12,8,20,0,13,7,14,6,15,5,16,4,17,3,18,2,19,1]) # XXX needs generalizing
         iorder = np.argsort(order)
         F_o = np.take(np.take(F,order, axis=0), order, axis=1)
         L_o = np.linalg.cholesky(F_o)
         U,S,V = np.linalg.svd(L_o.conj())
         M_o = np.dot(np.transpose(V), np.dot(np.diag(1./S), np.transpose(U)))
         M = np.take(np.take(M_o,iorder, axis=0), iorder, axis=1)
     W = np.dot(M, F)
     norm  = W.sum(axis=-1); norm.shape += (1,)
     M /= norm; W = np.dot(M, F)
     return M,W
Beispiel #18
0
    def getMonotonicCurves(self):
        """
        Convenience method that calls getAllCurves and makes sure that all of
        the X values are strictly increasing.

        :return: It returns a list of the form:
                [[xvalues0, yvalues0, legend0, dict0],
                 [xvalues1, yvalues1, legend1, dict1],
                 [...],
                 [xvaluesn, yvaluesn, legendn, dictn]]
        """
        allCurves = self.getAllCurves() * 1
        for i in range(len(allCurves)):
            curve = allCurves[i]
            x, y, legend, info = curve[0:4]
            if self.isCurveHidden(legend):
                continue
            # Sort
            idx = argsort(x, kind='mergesort')
            xproc = take(x, idx)
            yproc = take(y, idx)
            # Ravel, Increase
            xproc = xproc.ravel()
            idx = nonzero((xproc[1:] > xproc[:-1]))[0]
            xproc = take(xproc, idx)
            yproc = take(yproc, idx)
            allCurves[i][0:2] = xproc, yproc
        return allCurves
Beispiel #19
0
 def getColumns(self, columns, **kwargs):
     """Return a slice of columns"""
     # check whether we have integer columns
     
     if isinstance(columns, str):
         columns = [columns]
     
     is_int = min([isinstance(val, int) for val in columns])
     indexes = []
     if is_int:
         indexes = columns
     else:
         indexes = [self.Header.index(head) for head in columns]
     
     if self._row_ids:
         # we disallow reordering of identifiers, and ensure they are only
         # presented once
         for val in range(self._row_ids):
             try:
                 indexes.remove(val)
             except ValueError:
                 pass
         indexes = range(self._row_ids) + indexes
     
     columns = numpy.take(numpy.asarray(self.Header, dtype="O"),
                            indexes)
     new = numpy.take(self.array, indexes, axis=1)
     
     kw = self._get_persistent_attrs()
     kw.update(kwargs)
     return Table(header = columns, rows = new, **kw)
Beispiel #20
0
def shoelace(vertices):
    """        
    Calculate twice the area of polygon using Shoelace formula.
    
    Polygon is defined by vertices.
    
    Parameters
    ----------
    vertices : array_like
        Vertex coordinates in a 2-D space.
        Coordinates must be placed along the last axis. And data points are
        along the first axis.
    
    Returns
    -------
    area : float
        You can deduce the order of input vertices from the sign:
        area is positive if vertices are in counter-clockwise order.
        area is negative if vertices are in clockwise order.
        area is zero if all points are colinear.
    
    Notes
    -----
    This function can be also used to judge if all points in a data set are
    collinear. Collinear points as input for initializing Polygon instance
    will raise a QhullError.
    
    Examples
    --------
    Vertices of a square:

    Clockwise:    
    
    >>> from tadlib.calfea.polygon import shoelace
    >>> sq = [(0,0), (0,1), (1,1), (1,0)]
    >>> shoelace(sq)
    -2.0
    
    Counter-clockwise:
    
    >>> sq = [(0,0), (1,0), (1,1), (0,1)]
    >>> shoelace(sq)
    2.0
        
    """
    
    vertices = np.asfarray(vertices)
    # Rule for stacking multiple comma separated arrays
    rule = '0,' + str(len(vertices.shape))
    # Slip the array along the first axis
    slip_v = np.r_[rule, vertices[-1], vertices[:-1]]
    # Extract coordinates
    x = np.take(vertices, [0], axis=-1).reshape(vertices.shape[:-1])
    y = np.take(vertices, [1], axis=-1).reshape(vertices.shape[:-1])
    slip_x = np.take(slip_v, [0], axis=-1).reshape(vertices.shape[:-1])
    slip_y = np.take(slip_v, [1], axis=-1).reshape(vertices.shape[:-1])
    # Sholelace Foluma
    area = np.sum(y * slip_x - x * slip_y, axis=0)
    
    return area
Beispiel #21
0
def plot_multi(signal, ax=None, **kwargs):
    default_min = float(kwargs.pop('minrange', 0.))
    default_max = float(kwargs.pop('maxrange', 1.))
    plot_range = set_range(signal, default_min, default_max)

    axis_name = kwargs.pop('multi', None)
    kwargs.pop('type', None)
    kwargs.pop('stack', '1,1')
    kwargs.poop('signals', None)
    axes = [getattr(signal, axis) for axis in signal.axes]
    axis_index = signal.axes.index(axis_name)
    multi_axis = axes.pop(axis_index)
    if ax is None:
        ax = plt.subplot(111)
    ax.grid()
    legend = kwargs.pop('legend', False)
    for index, label in enumerate(multi_axis):
        label = '{} = {:.3f} {}'.format(axis_name, label, multi_axis.units)
        data = np.take(signal, index, axis=axis_index)
        plot_axes = [np.take(axis, index, axis=axis.axes.index(axis_name))
                     if axis_name in axis.axes else axis for axis in axes]
        plot_methods[data.ndim](data, *plot_axes, label=label, **kwargs)
    if legend:
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.subplots_adjust(right=0.65)
    plt.show()
    ax.set_ylim(plot_range[0], plot_range[1])
    kwargs['multi'] = axis_name
Beispiel #22
0
def plot_eig(data,nchan):
    days=data.keys()
    for k in days:
        eig_order=[]
        eigs = []
        eigs_cav = []
        for bl in data[k]:
           c_mat=cov(data[k][bl])
           cav = get_cav(c_mat,nchan,scaling=opts.auto)
           U,S,V= n.linalg.svd(c_mat.conj())
           U_cav,S_cav,V_cav = n.linalg.svd(cav.conj())
           eig_order.append(S[0])
           eigs.append( n.fft.fftshift(n.fft.fft(V.T.conj(),axis=0)))
           eigs_cav.append( n.fft.fftshift(n.fft.fft(V_cav.T.conj(),axis=0)))


        order=n.argsort(eig_order)

        eig_order=n.take(eig_order,order)
        eigs=n.take(eigs,order,axis=0)
        eigs_cav=n.take(eigs_cav,order,axis=0)
        embed()
        fig=p.figure(1)
        for cnt,eig in enumerate(eigs):
            p.plot(eig[0] + cnt*5)
        p.title('Eigenvectors for day {0}'.format(k))
        p.show()
        p.savefig('eigenvectors_{0}.png'.format(k))
        p.clf()
        for cnt,eig in enumerate(eigs_cav):
            p.plot(eig[0] + cnt*5)
        p.title('Eigenvectors of Cav for day {0}'.format(k))
        p.savefig('eigenvectors_cav_{0}.png'.format(k))
        p.clf()
        p.close()
Beispiel #23
0
    def interpolate(self, points):
        if self.tri == None:
            xc = self.x_coords.flatten()
            yc = self.y_coords.flatten()
            self.no_nan_values = self.values.flatten()

            if np.isnan(xc).any() and np.isnan(yc).any():
                xc = xc[~np.isnan(xc)]
                yc = yc[~np.isnan(yc)]
                self.no_nan_values = self.no_nan_values[~np.isnan(self.no_nan_values)]

            # Default: Qbb Qc Qz 
            self.tri = qhull.Delaunay(np.column_stack((xc, yc)), qhull_options='QbB')

        simplices = self.tri.find_simplex(points)

        indices = np.take(self.tri.simplices, simplices, axis=0)
        transforms = np.take(self.tri.transform, simplices, axis=0)

        delta = points - transforms[:,2]
        bary = np.einsum('njk,nk->nj', transforms[:,:2,:], delta)

        temp = np.hstack((bary, 1-bary.sum(axis=1, keepdims=True)))

        values = np.einsum('nj,nj->n', np.take(self.no_nan_values, indices), temp)

        #print values[np.any(temp<0, axis=1)]

        # This should put a NaN for points outside of any simplices
        # but is for some reason sometimes also true inside a simplex
        #values[np.any(temp < 0.0, axis=1)] = np.nan

        return values
Beispiel #24
0
def _set_reach_dist(core_distances_, reachability_, predecessor_,
                    point_index, processed, X, nbrs, metric, metric_params,
                    p, max_eps):
    P = X[point_index:point_index + 1]
    # Assume that radius_neighbors is faster without distances
    # and we don't need all distances, nevertheless, this means
    # we may be doing some work twice.
    indices = nbrs.radius_neighbors(P, radius=max_eps,
                                    return_distance=False)[0]

    # Getting indices of neighbors that have not been processed
    unproc = np.compress(~np.take(processed, indices), indices)
    # Neighbors of current point are already processed.
    if not unproc.size:
        return

    # Only compute distances to unprocessed neighbors:
    if metric == 'precomputed':
        dists = X[point_index, unproc]
    else:
        _params = dict() if metric_params is None else metric_params.copy()
        if metric == 'minkowski' and 'p' not in _params:
            # the same logic as neighbors, p is ignored if explicitly set
            # in the dict params
            _params['p'] = p
        dists = pairwise_distances(P, np.take(X, unproc, axis=0),
                                   metric, n_jobs=None,
                                   **_params).ravel()

    rdists = np.maximum(dists, core_distances_[point_index])
    improved = np.where(rdists < np.take(reachability_, unproc))
    reachability_[unproc[improved]] = rdists[improved]
    predecessor_[unproc[improved]] = point_index
Beispiel #25
0
    def _map(self, X):
        """ Maps from a scalar or an array to an RGBA value or array.

        The *X* parameter is either a scalar or an array (of any dimension).
        If it is scalar, the function returns a tuple of RGBA values; otherwise
        it returns an array with the new shape = oldshape+(4,).  Any values
        that are outside the 0,1 interval are clipped to that interval before
        generating RGB values.
        """

        if type(X) in [IntType, FloatType]:
            vtype = 'scalar'
            xa = array([X])
        else:
            vtype = 'array'
            xa = asarray(X)

        # assume the data is properly normalized
        #xa = where(xa>1.,1.,xa)
        #xa = where(xa<0.,0.,xa)


        nanmask = isnan(xa)
        xa = where(nanmask, 0, (xa * (self.steps-1)).astype(int))
        rgba = zeros(xa.shape+(4,), float)
        rgba[...,0] = where(nanmask, 0, take(self._red_lut, xa))
        rgba[...,1] = where(nanmask, 0, take(self._green_lut, xa))
        rgba[...,2] = where(nanmask, 0, take(self._blue_lut, xa))
        rgba[...,3] = where(nanmask, 0, take(self._alpha_lut, xa))
        if vtype == 'scalar':
            rgba = tuple(rgba[0,:])

        return rgba
Beispiel #26
0
    def _set_reach_dist(self, point_index, X, nbrs):
        P = np.array(X[point_index]).reshape(1, -1)
        indices = nbrs.radius_neighbors(P, radius=self.max_bound,
                                        return_distance=False)[0]

        # Getting indices of neighbors that have not been processed
        unproc = np.compress((~np.take(self._processed, indices)).ravel(),
                             indices, axis=0)
        # Keep n_jobs = 1 in the following lines...please
        if len(unproc) > 0:
            dists = pairwise_distances(P, np.take(X, unproc, axis=0),
                                       self.metric, n_jobs=1).ravel()

            rdists = np.maximum(dists, self.core_distances_[point_index])
            new_reach = np.minimum(np.take(self.reachability_, unproc), rdists)
            self.reachability_[unproc] = new_reach

        # Checks to see if everything is already processed;
        # if so, return control to main loop
        if unproc.size > 0:
            # Define return order based on reachability distance
            return(unproc[quick_scan(np.take(self.reachability_, unproc),
                                     dists)])
        else:
            return point_index
Beispiel #27
0
    def test_np_ufuncs(self):
        z = self.create_array(shape=(100, 100), chunks=(10, 10))
        a = np.arange(10000).reshape(100, 100)
        z[:] = a

        eq(np.sum(a), np.sum(z))
        assert_array_equal(np.sum(a, axis=0), np.sum(z, axis=0))
        eq(np.mean(a), np.mean(z))
        assert_array_equal(np.mean(a, axis=1), np.mean(z, axis=1))
        condition = np.random.randint(0, 2, size=100, dtype=bool)
        assert_array_equal(np.compress(condition, a, axis=0),
                           np.compress(condition, z, axis=0))
        indices = np.random.choice(100, size=50, replace=True)
        assert_array_equal(np.take(a, indices, axis=1),
                           np.take(z, indices, axis=1))

        # use zarr array as indices or condition
        zc = self.create_array(shape=condition.shape, dtype=condition.dtype,
                               chunks=10, filters=None)
        zc[:] = condition
        assert_array_equal(np.compress(condition, a, axis=0),
                           np.compress(zc, a, axis=0))
        zi = self.create_array(shape=indices.shape, dtype=indices.dtype,
                               chunks=10, filters=None)
        zi[:] = indices
        # this triggers __array__() call with dtype argument
        assert_array_equal(np.take(a, indices, axis=1),
                           np.take(a, zi, axis=1))
    def __init__(self, x, y, ival=0., sorted=False, side='left'):

        if side.lower() not in ['right', 'left']:
            msg = "side can take the values 'right' or 'left'"
            raise ValueError(msg)
        self.side = side

        _x = np.asarray(x)
        _y = np.asarray(y)

        if _x.shape != _y.shape:
            msg = "x and y do not have the same shape"
            raise ValueError(msg)
        if len(_x.shape) != 1:
            msg = 'x and y must be 1-dimensional'
            raise ValueError(msg)

        self.x = np.r_[-np.inf, _x]
        self.y = np.r_[ival, _y]

        if not sorted:
            asort = np.argsort(self.x)
            self.x = np.take(self.x, asort, 0)
            self.y = np.take(self.y, asort, 0)
        self.n = self.x.shape[0]
Beispiel #29
0
    def get_left_channels(self, energy, nchan=1):
        self.initialize()
        g_s_ii = self.greenfunction.retarded(energy)
        lambda_l_ii = self.selfenergies[0].get_lambda(energy)
        lambda_r_ii = self.selfenergies[1].get_lambda(energy)

        if self.greenfunction.S is not None:
            s_mm = self.greenfunction.S
            s_s_i, s_s_ii = linalg.eig(s_mm)
            s_s_i = np.abs(s_s_i)
            s_s_sqrt_i = np.sqrt(s_s_i)  # sqrt of eigenvalues
            s_s_sqrt_ii = np.dot(s_s_ii * s_s_sqrt_i, dagger(s_s_ii))
            s_s_isqrt_ii = np.dot(s_s_ii / s_s_sqrt_i, dagger(s_s_ii))

        lambdab_r_ii = np.dot(np.dot(s_s_isqrt_ii, lambda_r_ii), s_s_isqrt_ii)
        a_l_ii = np.dot(np.dot(g_s_ii, lambda_l_ii), dagger(g_s_ii))
        ab_l_ii = np.dot(np.dot(s_s_sqrt_ii, a_l_ii), s_s_sqrt_ii)
        lambda_i, u_ii = linalg.eig(ab_l_ii)
        ut_ii = np.sqrt(lambda_i / (2.0 * np.pi)) * u_ii
        m_ii = 2 * np.pi * np.dot(np.dot(dagger(ut_ii), lambdab_r_ii), ut_ii)
        T_i, c_in = linalg.eig(m_ii)
        T_i = np.abs(T_i)

        channels = np.argsort(-T_i)[:nchan]
        c_in = np.take(c_in, channels, axis=1)
        T_n = np.take(T_i, channels)
        v_in = np.dot(np.dot(s_s_isqrt_ii, ut_ii), c_in)

        return T_n, v_in
Beispiel #30
0
	def resample(self):
		"resample() randomly draws a set of points equal in size to the original set from the cached data for bootstrapping"
		assert hasattr(self, "saved_xarray"), "resampling not set up yet.  Call setup_resampling() first."
		ranlist=Numeric.floor(self.get_random_list(self.pointcount)*self.pointcount).astype(numeric_int)
		self.xarray=Numeric.take(self.saved_xarray, ranlist, -1) #take columns since vectors lie this way
		self.yarray=Numeric.take(self.saved_yarray, ranlist)
		self.firstpass=1
Beispiel #31
0
                            & (cham[1] > 4))
ne_southwesterly = np.where((neu[0] > 202.5) & (neu[0] < 247.5) & (neu[1] > 4))
wolf_southwesterly = np.where((wolf[0] > 202.5) & (wolf[0] < 247.5)
                              & (wolf[1] > 4))
#westerly
ch_westerly = np.where((cham[0] > 247.5) & (cham[0] < 292.5) & (cham[1] > 4))
ne_westerly = np.where((neu[0] > 247.5) & (neu[0] < 292.5) & (neu[1] > 4))
wolf_westerly = np.where((wolf[0] > 247.5) & (wolf[0] < 292.5) & (wolf[1] > 4))
#northwesterly
ch_northwesterly = np.where((cham[0] > 292.5) & (cham[0] < 337.5)
                            & (cham[1] > 4))
ne_northwesterly = np.where((neu[0] > 292.5) & (neu[0] < 337.5) & (neu[1] > 4))
wolf_northwesterly = np.where((wolf[0] > 292.5) & (wolf[0] < 337.5)
                              & (wolf[1] > 4))
###############################################################################
chfc_n = np.take(cham[2], ch_northerly)
chwd_n = np.take(cham[0], ch_northerly)
nefc_n = np.take(neu[2], ne_northerly)
newd_n = np.take(neu[0], ne_northerly)
methfc_n = np.take(wolf[2], wolf_northerly)
methwd_n = np.take(wolf[0], wolf_northerly)
methc_n = np.take(wolf[3], wolf_northerly)

chfc_ne = np.take(cham[2], ch_northeasterly)
chwd_ne = np.take(cham[0], ch_northeasterly)
nefc_ne = np.take(neu[2], ne_northeasterly)
newd_ne = np.take(neu[0], ne_northeasterly)
methfc_ne = np.take(wolf[2], wolf_northeasterly)
methwd_ne = np.take(wolf[0], wolf_northeasterly)
methc_ne = np.take(wolf[3], wolf_northeasterly)
 def load_feature(image_idx):
     selected_features = np.take(self.features, image_idx, axis=0)
     return selected_features
Beispiel #33
0
def plt_mfd(Run_name,mega_MFD, scenarios_names_list, ScL_complet_list, ScL_list, Model_list,BG_hyp_list,
             dimension_used_list,faults_name_list,sample_list,b_value_list,MFD_type_list,m_Mmax,
             mega_bining_in_mag,a_s_model,b_sample,sm_sample,Mt_sample,plot_mfd,plot_as_rep,plot_Mmax,xmin,xmax,ymin,ymax,
             catalog_cum_rate,plot_mfd_detailled,bining_in_mag):
    
    
    file_scenarios_MFD_name = str(Run_name) + '/analysis/txt_files/scenarios_MFD.txt'
    file_scenarios_MFD = open(file_scenarios_MFD_name,'w')
    
    if plot_mfd == True :
        for scenario in scenarios_names_list :
            
            mfds_scenario = []
            for mfd_i in mega_MFD:
                if mfd_i[8] == scenario:
                   mfds_scenario.append(mfd_i)
                   
            mfd_scenario_cumulative = []
            mfd_source_cummulative = []
            for mfd in mfds_scenario:
                mfd_i = mfd[11::].astype(np.float)
                
                mfd_source_cummulative_i = []
                for i in range(len(mfd_i)): #calculate the cumulative for each source
                    mfd_source_cummulative_i.append(np.sum(np.array(mfd_i)[-(len(mfd_i)-i):])) 
                mfd_source_cummulative.append(mfd_source_cummulative_i)
                
            for sample in sample_list:
                rows, cols = np.where(np.array(mfds_scenario) == sample) 
                
                mfds_scenario_sample = np.take(mfd_source_cummulative,rows,axis= 0)
                
                mfd_scenario_cumulative_sample = np.sum(mfds_scenario_sample,axis=0)
                mfd_scenario_cumulative.append(mfd_scenario_cumulative_sample)
                
                    
                file_scenarios_MFD.write(scenario + '\t' + str(mfd_scenario_cumulative_sample)+'\n')
        
    file_scenarios_MFD.close()
    
    
    #"#### plot for the whole tree
    file_branch_cumMFD_name = str(Run_name) + '/analysis/txt_files/branch_cumMFD.txt'        
    file_branch_cumMFD = open(file_branch_cumMFD_name,'w')
    
    mega_mfd_cummulative = [] #will contain the cummulative MFD for each model of the logic tree
    total_list_BG_hyp = []  #wil contain the list of the M_trunc for each model of the logic tree
    total_list_complet_ScL = []
    total_list_ScL = [] #wil contain the list of the ScL for each model of the logic tree
    total_list_dimension_used = [] #wil contain the list of the dimension used for each model of the logic tree
    total_list_b_value = []
    total_list_MFD_type = []
    total_list_scenario_name = []
    total_list_model = []
    total_list_sample = []
    
    geologic_moment_rate = [] # list of the moment rate of each model
    geologic_moment_rate_no_as = []  # list of the moment rate of each modelif no aseismic slip is considered
    
    selected_ScL = 'Init0'
    Dimention_used = 'Init0' 
    str_all_data = 'Init0' 
    Model = 'Init0' 
    BG_hyp = 'Init0'
    b_min = 'Init0'
    b_max = 'Init0' 
    MFD_type = 'Init0'
    scenario_name = 'Init0'
    sample = 'Init0'
    
    mfd_i = np.zeros(len(mega_MFD[0][11::]))
    
    index = 0
    for mega_mfd_i in mega_MFD :
        if (mega_mfd_i[0] == selected_ScL) and (mega_mfd_i[1] == Dimention_used) and (mega_mfd_i[2] == str_all_data) and (mega_mfd_i[3] == Model
        ) and (mega_mfd_i[4] == BG_hyp) and (mega_mfd_i[5] == b_min) and (mega_mfd_i[6] == b_max) and (mega_mfd_i[7] == MFD_type
        ) and (mega_mfd_i[8] == scenario_name)  and (mega_mfd_i[9] == sample): #same model, we add sources
            #print 'ok'
            mfd_i += mega_mfd_i[11::].astype(np.float)
            
        else : #it means it a new model
            if sum(mfd_i) != 0. : #we calculate the cumulative MFD
                mfd_cummulative_i = []
                geologic_moment_rate_i = 0.
                for i in range(len(mfd_i)): #calculate the cumulative for each source
                    mfd_cummulative_i.append(np.sum(np.array(mfd_i)[-(len(mfd_i)-i):]))
                    M0 = 10. ** (1.5 * mega_bining_in_mag[i] + 9.1)
                    rate_M0 = M0 * mfd_i[i]
                    geologic_moment_rate_i += rate_M0
                geologic_moment_rate.append(geologic_moment_rate_i)
                geologic_moment_rate_no_as.append(geologic_moment_rate_i * 100. / (100. - float(a_s_model[index])))
                
                mega_mfd_cummulative.append(mfd_cummulative_i)
                total_list_BG_hyp.append(BG_hyp)
                total_list_complet_ScL.append((str(selected_ScL) + '_' + str(Dimention_used) + '_' + str(str_all_data)))
                total_list_ScL.append(selected_ScL)
                total_list_dimension_used.append(Dimention_used)
                total_list_model.append(Model)
                total_list_b_value.append('bmin_'+str(b_min)+'_bmax_'+str(b_max))
                total_list_MFD_type.append(MFD_type)
                total_list_scenario_name.append(scenario_name)
                total_list_sample.append(sample)
                file_branch_cumMFD.write(str(Model) + '\t' + str(MFD_type) + '\t' + str(BG_hyp) + '\t' + str(scenario_name) + '\t' + str((str(selected_ScL) + '_' + str(Dimention_used) + '_' + str(str_all_data))) + '\t' + 'bmin_'+str(b_min)+'_bmax_'+str(b_max) + '\t' + str(sample) + '\t' + '\t'.join(map(str,mfd_cummulative_i)) + '\n')
                
                index += 1
                
            mfd_i = np.zeros(len(mega_mfd_i[11::]))
            selected_ScL = mega_mfd_i[0]
            Dimention_used = mega_mfd_i[1] 
            str_all_data = mega_mfd_i[2] 
            Model = mega_mfd_i[3] 
            BG_hyp = mega_mfd_i[4] 
            b_min = mega_mfd_i[5] 
            b_max = mega_mfd_i[6] 
            MFD_type = mega_mfd_i[7]
            #a_s = mega_mfd_i[8]
            scenario_name = mega_mfd_i[8] 
            sample = mega_mfd_i[9] 
            mfd_i += mega_mfd_i[11::].astype(np.float)

    #we write for the last model 
    mfd_cummulative_i = []
    geologic_moment_rate_i = 0.
    for i in range(len(mfd_i)): #calculate the cumulative for each source
        mfd_cummulative_i.append(np.sum(np.array(mfd_i)[-(len(mfd_i)-i):]))
        M0 = 10. ** (1.5 * mega_bining_in_mag[i] + 9.1)
        rate_M0 = M0 * mfd_i[i]
        geologic_moment_rate_i += rate_M0
    geologic_moment_rate.append(geologic_moment_rate_i)
    geologic_moment_rate_no_as.append(geologic_moment_rate_i * 100. / (100. - float(a_s_model[index])))       
    geologic_moment_rate.append(geologic_moment_rate_i)
    geologic_moment_rate_no_as.append(geologic_moment_rate_i * 100. / (100. - float(a_s_model[index])))
    
    mega_mfd_cummulative.append(mfd_cummulative_i)
    total_list_BG_hyp.append(BG_hyp)
    total_list_complet_ScL.append((str(selected_ScL) + '_' + str(Dimention_used) + '_' + str(str_all_data)))
    total_list_ScL.append(selected_ScL)
    total_list_dimension_used.append(Dimention_used)
    total_list_model.append(Model)
    total_list_b_value.append('bmin_'+str(b_min)+'_bmax_'+str(b_max))
    total_list_MFD_type.append(MFD_type)
    total_list_scenario_name.append(scenario_name)
    total_list_sample.append(sample)
    file_branch_cumMFD.write(str(Model) + '\t' + str(MFD_type) + '\t' + str(BG_hyp) + '\t' + str(scenario_name) + '\t' + str((str(selected_ScL) + '_' + str(Dimention_used) + '_' + str(str_all_data))) + '\t' + 'bmin_'+str(b_min)+'_bmax_'+str(b_max) + '\t' + str(sample) + '\t' + '\t'.join(map(str,mfd_cummulative_i)) + '\n')
    
    file_branch_cumMFD.close()
    
    
    if len(mega_mfd_cummulative) < 4 :
        plot_mfd = False       
    mfd_X = mega_mfd_cummulative
    for i in range(len(mfd_X)):
        plt.scatter(mega_bining_in_mag,mfd_X[i], c='darkcyan', s=50, edgecolor='',marker = '_',alpha = 0.5)
    axes = plt.gca()
    axes.set_xlim([xmin,xmax])
    axes.set_ylim([ymin,ymax])
    for index_mag in range(len(mega_bining_in_mag)): 
        rate_plus = np.percentile(mfd_X,84,axis=0)[index_mag]
        rate_minus = np.percentile(mfd_X,16,axis=0)[index_mag]
        mag = mega_bining_in_mag[index_mag]
        mag_plus = mag+0.05
        mag_minus = mag-0.05
        verts = [(mag_minus, rate_minus ),
                 (mag_minus, rate_plus),
                 (mag_plus, rate_plus),
                 (mag_plus, rate_minus),
                 (mag_minus, rate_minus)]
        codes = [Path.MOVETO,
                 Path.LINETO,
                 Path.LINETO,
                 Path.LINETO,
                 Path.CLOSEPOLY]
                 
        path_poly = Path(verts, codes)
        
        patch = patches.PathPatch(path_poly,facecolor = 'darkgreen', lw = 0., alpha = 0.15)
        axes.add_patch(patch)
                    
    plt.scatter(mega_bining_in_mag,np.percentile(mfd_X,50,axis=0),
                c='darkgreen', s=25, edgecolor='',marker = 'o',alpha = 0.8)
    plt.scatter(mega_bining_in_mag,np.percentile(mfd_X,16,axis=0),
            c='darkgreen', s=60, edgecolor='',marker = '_',alpha = 0.8)
    plt.scatter(mega_bining_in_mag,np.percentile(mfd_X,84,axis=0),
        c='darkgreen', s=60, edgecolor='',marker = '_',alpha = 0.8)
    plt.plot(mega_bining_in_mag,np.array(mfd_X).mean(axis=0),
                color='darkgreen', linewidth = 2)
    plt.grid()
    
    #plot the MFDs of the wholle tree with mean and percentiles    
#    for i in range(len(mega_mfd_cummulative)):
#        plt.scatter(mega_bining_in_mag,mega_mfd_cummulative[i], c='darkcyan', s=50, edgecolor='',marker = '_',alpha = 0.25)
#        
#    plt.scatter(mega_bining_in_mag,np.percentile(mega_mfd_cummulative,50,axis=0),
#                c='darkgreen', s=30, edgecolor='',marker = 'o',alpha = 0.8)
#    plt.scatter(mega_bining_in_mag,np.percentile(mega_mfd_cummulative,16,axis=0),
#            c='darkgreen', s=20, edgecolor='',marker = '+',alpha = 0.8)
#    plt.scatter(mega_bining_in_mag,np.percentile(mega_mfd_cummulative,84,axis=0),
#        c='darkgreen', s=20, edgecolor='',marker = '+',alpha = 0.8)
#    plt.scatter(mega_bining_in_mag,np.array(mega_mfd_cummulative).mean(axis=0),
#                c='darkslateblue', s=50, edgecolor='',marker = 's',alpha = 0.95) 
#    
#    
#    axes = plt.gca()
#    axes.set_xlim([xmin,xmax])
#    axes.set_ylim([ymin,ymax])
    plt.grid()
    plt.yscale('log')
    plt.title('MFD of the whole tree ')
    plt.savefig(str(Run_name) + '/analysis/figures/mfd/mdf_whole_tree.png' , dpi = 180, transparent=True)
    #plt.show()
    plt.close()
    
        
    rate_in_catalog = catalog_cum_rate

    #bining_in_mag = np.linspace(5.,7.5,26)
    '''##########################################
    #plot mfd for each scenario of the logic tree
    ############################################'''
    if len(scenarios_names_list)>1:
        index_model = 0
        for model in Model_list : 
            rate_in_catalog = catalog_cum_rate[index_model]
            for scenario in scenarios_names_list :
                
                if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/scenario_set/' + scenario):
                    os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/scenario_set/' + scenario)
                rows = np.where(np.array(total_list_scenario_name) == scenario)[0]  
                mfd_X = []
                for index in rows :  
                    mfd = mega_mfd_cummulative[index]
                    mfd_X.append(mfd)
                    
                #density plot
                if plot_mfd == True :   
                    
                    hyp_name = scenario
                    path = str(Run_name) + '/analysis/figures/analyze_branches/scenario_set/' + scenario
                    do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)

            index_model += 1
                
    '''##########################################
    #plot mfd for each model of the logic tree
    ############################################'''
    index_model = 0
    for model in Model_list : 
#        print catalog_cum_rate
#        print 
        rate_in_catalog = catalog_cum_rate[index_model]
        if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model):
            os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model)
        rows = np.where(np.array(total_list_model) == model)[0]  
        mfd_X = []
        for index in rows :  
            mfd = mega_mfd_cummulative[index]
            mfd_X.append(mfd)
            
        #density plot
        if plot_mfd == True :   
            hyp_name = model
            path = str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model
            do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)
        index_model +=1
    '''##########################################
    #plot mfd for each Background hypothesis  of the logic tree
    ############################################'''
    if len(BG_hyp_list) > 1:
        for BG_hyp in BG_hyp_list :        
            if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/BG/' + BG_hyp):
                os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/BG/' + BG_hyp)
            rows = np.where(np.array(total_list_BG_hyp) == BG_hyp)[0]  
            mfd_X = []
            index_check = 0
            for index in rows :  
                mfd = mega_mfd_cummulative[index]
                mfd_X.append(mfd)
                index_check += 1
                
            #density plot
            if plot_mfd == True :   
                hyp_name = BG_hyp
                path = str(Run_name) + '/analysis/figures/analyze_branches/BG/' + BG_hyp
                do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)

                
                                
    '''##########################################
    #plot mfd for each MFD  of the logic tree
    ############################################'''
    if len(MFD_type_list) > 1:
        for MFD_type in MFD_type_list :        
            if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/MFD_type/' + MFD_type):
                os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/MFD_type/' + MFD_type)
            rows = np.where(np.array(total_list_MFD_type) == MFD_type)[0]  
            mfd_X = []
            for index in rows :  
                mfd = mega_mfd_cummulative[index]
                mfd_X.append(mfd)
                
           #density plot
            if plot_mfd == True :  
                hyp_name = MFD_type
                path = str(Run_name) + '/analysis/figures/analyze_branches/MFD_type/' + MFD_type
                do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)

                
    '''##########################################
    #plot mfd for each bvalue  of the logic tree
    ############################################'''
    if len(b_value_list) > 1:
        for b in b_value_list :        
            if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/b_value/' + b):
                os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/b_value/' + b)
            rows = np.where(np.array(total_list_b_value) == b)[0]  
            mfd_X = []
            for index in rows :  
                mfd = mega_mfd_cummulative[index]
                mfd_X.append(mfd)
                
            #density plot
            if plot_mfd == True :   
                hyp_name = b
                path = str(Run_name) + '/analysis/figures/analyze_branches/b_value/' + b
                do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)

                
    '''##########################################
    #plot mfd for scalling law  of the logic tree
    ############################################'''
    if len(ScL_complet_list) > 1:
        for ScL in ScL_complet_list :        
            if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/ScL/' + ScL):
                os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/ScL/' + ScL)
            rows = np.where(np.array(total_list_complet_ScL) == ScL)[0]  
            mfd_X = []
            for index in rows :  
                mfd = mega_mfd_cummulative[index]
                mfd_X.append(mfd)
                
            #density plot
            if plot_mfd == True :   
                hyp_name = ScL
                path = str(Run_name) + '/analysis/figures/analyze_branches/ScL/' + ScL
                do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)

    
#            
    '''######################################
    #plot Mmax for each ScL of the logic tree
    ######################################'''
        
    for ScL in ScL_complet_list :
        rows = np.where(np.array(total_list_complet_ScL) == ScL)[0]
        #mfd_ScL_cumulative = []
        Mmax_m_ScL = []
        for index in rows :
            mfd = mega_mfd_cummulative[index]
            #mfd_ScL_cumulative.append(mfd)
            Mmax_m_ScL.append(m_Mmax[index])
            
        
        if not os.path.exists(str(Run_name) + '/analysis/figures/Mmax/for_each_ScL'):
            os.makedirs(str(Run_name) + '/analysis/figures/Mmax/for_each_ScL')
        
        if plot_Mmax == True :
            plt.hist(Mmax_m_ScL,int(round(max(m_Mmax) - min(m_Mmax),1) * 10. + 1.))
            plt.title(ScL)
            plt.savefig(str(Run_name) + '/analysis/figures/Mmax/for_each_ScL/Hist_Mmax_' + ScL +'.png',dpi = 100)
            #plt.show()
            plt.close()

#            
    '''######################################
    #plot Mmax for each scenario set of the logic tree
    ######################################'''
        
    for Sc_set in scenarios_names_list :
        rows = np.where(np.array(total_list_scenario_name) == Sc_set)[0]
        #mfd_Sc_set_cumulative = []
        Mmax_m_Sc_set = []
        for index in rows :
            mfd = mega_mfd_cummulative[index]
            #mfd_Sc_set_cumulative.append(mfd)
            Mmax_m_Sc_set.append(m_Mmax[index])
            
        
        if not os.path.exists(str(Run_name) + '/analysis/figures/Mmax/for_each_scenario_set'):
            os.makedirs(str(Run_name) + '/analysis/figures/Mmax/for_each_scenario_set')
        
        if plot_Mmax == True :
            plt.hist(Mmax_m_Sc_set,int(round(max(m_Mmax) - min(m_Mmax),1) * 10. + 1.))
            plt.title(Sc_set)
            plt.savefig(str(Run_name) + '/analysis/figures/Mmax/for_each_scenario_set/Hist_Mmax_' + Sc_set +'.png',dpi = 100)
            #plt.show()
            plt.close()
#
##            
#    '''######################################
#    # the magnitude of rupture in which each faults are involed, for each set of scenarios
#           work in kinda progress
#    ######################################'''
#
#    for fault in faults_name_list:
#        for Sc_set in scenarios_names_list :
#            rows = np.where(np.array(mega_MFD) == Sc_set)[0]
#            #mfd_Sc_set_cumulative = []
#            Mmax_m_Sc_set = []
#            for index in rows :
#                mfd = mega_mfd_cummulative[index]
#                Mmax_m_Sc_set.append(m_Mmax[index])
#                
#            
#            if not os.path.exists(str(Run_name) + '/analysis/figures/Mmax/for_each_scenario_set'):
#                os.makedirs(str(Run_name) + '/analysis/figures/Mmax/for_each_scenario_set')
#            
#            if plot_Mmax == True :
#                plt.hist(Mmax_m_Sc_set,int(round(max(m_Mmax) - min(m_Mmax),1) * 10. + 1.))
#                plt.title(Sc_set)
#                plt.savefig(str(Run_name) + '/analysis/figures/Mmax/for_each_scenario_set/Hist_Mmax_' + Sc_set +'.png',dpi = 100)
#                #plt.show()
#                plt.close()
#    
    '''######################################
    #########################################
    #    detailled plot for combinaison of
    #       hypothesis
    #########################################
    ######################################'''    

    '''##########################################
    # calculate the difference between the mean rate of the model and the mean rate of the catalog
    ############################################'''        
          
    if plot_mfd == True and plot_mfd_detailled == True:
        file_branch_to_catalog_name = str(Run_name) + '/analysis/txt_files/branch_vs_catalog.txt'
        file_branch_to_catalog = open(file_branch_to_catalog_name,'w')
        index_model = 0
        for model in Model_list : 
            rate_in_catalog = catalog_cum_rate[index_model]
            for MFD_type in MFD_type_list :  
                for scenario in scenarios_names_list :
                    for b_value in b_value_list :  
                        for BG_hyp in BG_hyp_list :  
                            for ScL in ScL_complet_list :  
                                rows_model = np.where(np.array(total_list_model) == model)[0]  
                                rows_mfd = np.where(np.array(total_list_MFD_type) == MFD_type)[0] 
                                rows_sc = np.where(np.array(total_list_scenario_name) == scenario)[0]  
                                rows_ScL = np.where(np.array(total_list_complet_ScL) == ScL)[0]  
                                rows_b = np.where(np.array(total_list_b_value) == b_value)[0]  
                                rows_bg = np.where(np.array(total_list_BG_hyp) == BG_hyp)[0]  
                                rows = list(set(rows_model).intersection(rows_mfd)) 
                                rows = list(set(rows).intersection(rows_sc)) 
                                rows = list(set(rows).intersection(rows_ScL)) 
                                rows = list(set(rows).intersection(rows_b)) 
                                rows = list(set(rows).intersection(rows_bg)) 
                                if len(rows) > 0:
                                    file_branch_to_catalog.write(str(model)+'\t')
                                    file_branch_to_catalog.write(str(MFD_type)+'\t')
                                    file_branch_to_catalog.write(str(scenario)+'\t')
                                    file_branch_to_catalog.write(str(b_value)+'\t')
                                    file_branch_to_catalog.write(str(BG_hyp)+'\t')
                                    file_branch_to_catalog.write(str(ScL)+'\t')
                                    mfd_X = []
                                    for index in rows :  
                                        mfd = mega_mfd_cummulative[index]
                                        mfd_X.append(mfd)
                                    mean_rate_model = np.array(mfd_X).mean(axis=0)
                                    mean_rate_catalog = np.array(rate_in_catalog)#.mean(axis=0)
                                    for i in range(len(mean_rate_catalog)):
                                        file_branch_to_catalog.write(str(mean_rate_model[i]/mean_rate_catalog[i]-1.)+'\t')
                                    file_branch_to_catalog.write('\n')
            index_model +=1
        file_branch_to_catalog.close()
        
    '''##########################################
    # calculate the difference between the mean rate of the model and the mean rate of the catalog
    ############################################'''        
          
    if plot_mfd == True and plot_mfd_detailled == True:
        file_branch_to_catalog_name = str(Run_name) + '/analysis/txt_files/branch_vs_catalog.txt'
        file_branch_to_catalog = open(file_branch_to_catalog_name,'w')
        
        index_model = 0
        for model in Model_list : 
            rate_in_catalog = catalog_cum_rate[index_model]
            mean_rate_catalog = np.array(rate_in_catalog)#.mean(axis=0)
            for MFD_type in MFD_type_list :  
                for scenario in scenarios_names_list :
                    for b_value in b_value_list :  
                        for BG_hyp in BG_hyp_list :  
                            for ScL in ScL_complet_list :  
                                rows_model = np.where(np.array(total_list_model) == model)[0]  
                                rows_mfd = np.where(np.array(total_list_MFD_type) == MFD_type)[0] 
                                rows_sc = np.where(np.array(total_list_scenario_name) == scenario)[0]  
                                rows_ScL = np.where(np.array(total_list_complet_ScL) == ScL)[0]  
                                rows_b = np.where(np.array(total_list_b_value) == b_value)[0]  
                                rows_bg = np.where(np.array(total_list_BG_hyp) == BG_hyp)[0]  
                                rows = list(set(rows_model).intersection(rows_mfd)) 
                                rows = list(set(rows).intersection(rows_sc)) 
                                rows = list(set(rows).intersection(rows_ScL)) 
                                rows = list(set(rows).intersection(rows_b)) 
                                rows = list(set(rows).intersection(rows_bg)) 
                                if len(rows) > 0:
                                    file_branch_to_catalog.write(str(model)+'\t')
                                    file_branch_to_catalog.write(str(MFD_type)+'\t')
                                    file_branch_to_catalog.write(str(scenario)+'\t')
                                    file_branch_to_catalog.write(str(b_value)+'\t')
                                    file_branch_to_catalog.write(str(BG_hyp)+'\t')
                                    file_branch_to_catalog.write(str(ScL)+'\t')
                                    mfd_X = []
                                    for index in rows :  
                                        mfd = mega_mfd_cummulative[index]
                                        mfd_X.append(mfd)
                                    mean_rate_model = np.array(mfd_X).mean(axis=0)
                                    for i in range(len(mean_rate_catalog)):
                                        file_branch_to_catalog.write(str(mean_rate_model[i]/mean_rate_catalog[i]-1.)+'\t')
                                    file_branch_to_catalog.write('\n')
            index_model +=1
        file_branch_to_catalog.close()

    '''##########################################
    #plot mfd for each MFD shape hypothesis and scenario set
    ############################################'''        
          
    if plot_mfd == True and plot_mfd_detailled == True:
        if len(MFD_type_list) > 1 and len(scenarios_names_list)>1:
            index_model = 0
            for model in Model_list : 
                rate_in_catalog = catalog_cum_rate[index_model]
                for MFD_type in MFD_type_list :  
                    for scenario in scenarios_names_list :
                        if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model):
                            os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model)
                        if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/' + MFD_type):
                            os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/' + MFD_type)
                        if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/' + MFD_type+ '/' +scenario):
                            os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/' + MFD_type+ '/' +scenario)
                        rows_mfd = np.where(np.array(total_list_MFD_type) == MFD_type)[0] 
                        rows_sc = np.where(np.array(total_list_scenario_name) == scenario)[0]  
                        rows_i = list(set(rows_mfd).intersection(rows_sc)) 
                        rows_model = np.where(np.array(total_list_model) == model)[0]  
                        rows = list(set(rows_i).intersection(rows_model)) 
                        mfd_X = []
                        for index in rows :  
                            mfd = mega_mfd_cummulative[index]
                            mfd_X.append(mfd)
                            
                       #density plot
                        if plot_mfd == True :   
                            hyp_name =  model + ' ' + MFD_type + ' ' + scenario
                            path = str(Run_name) +'/analysis/figures/analyze_branches/Model/' + model+ '/' + MFD_type+ '/' +scenario
                            do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)
                index_model +=1
        
        
        
    '''##########################################
    #plot mfd for each background hypothesis and scenario set
    ############################################'''        
          
    if plot_mfd == True and plot_mfd_detailled == True:
        if len(BG_hyp_list) > 1 and len(scenarios_names_list)>1:
            index_model = 0
            for model in Model_list : 
                rate_in_catalog = catalog_cum_rate[index_model]
                for BG_hyp in BG_hyp_list :  
                    for scenario in scenarios_names_list :
                        if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/'  + BG_hyp+ '/' +scenario):
                            os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/'  + BG_hyp+ '/' +scenario)
                        rows_mfd = np.where(np.array(total_list_BG_hyp) == BG_hyp)[0] 
                        rows_sc = np.where(np.array(total_list_scenario_name) == scenario)[0]  
                        rows = list(set(rows_mfd).intersection(rows_sc)) 
                        rows_model = np.where(np.array(total_list_model) == model)[0]  
                        rows = list(set(rows).intersection(rows_model)) 
                        mfd_X = []
                        for index in rows :  
                            mfd = mega_mfd_cummulative[index]
                            mfd_X.append(mfd)
                            
                       #density plot
                        if plot_mfd == True :
                            hyp_name = BG_hyp + ' ' + scenario
                            path = str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/'  + BG_hyp+ '/' +scenario
                            #total_list_hyp = total_list_MFD_type
                            do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)
                index_model +=1
        
        
        
    '''##########################################
    #plot mfd for each model hypothesis and MFD
    ############################################'''        
          
          
    if plot_mfd == True and plot_mfd_detailled == True:
        if len(Model_list) > 1 and len(MFD_type_list)>1:
            index_model = 0
            for model in Model_list :  
                rate_in_catalog = catalog_cum_rate[index_model]
                for MFD_type in MFD_type_list :
                    if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/' +MFD_type):
                        os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/' +MFD_type)
                    rows_i = np.where(np.array(total_list_model) == model)[0] 
                    rows_j = np.where(np.array(total_list_MFD_type) == MFD_type)[0]  
                    rows = list(set(rows_i).intersection(rows_j)) 
                    mfd_X = []
                    for index in rows :  
                        mfd = mega_mfd_cummulative[index]
                        mfd_X.append(mfd)
                        
                   #density plot
                    if plot_mfd == True :
                        hyp_name = model + ' ' + MFD_type
                        path = str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/' +MFD_type
                        #total_list_hyp = total_list_MFD_type
                        do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)
                index_model +=1        
            
                
                
    '''##########################################
    #plot mfd for each background hypothesis and mfd
    ############################################'''        
          
    if plot_mfd == True and plot_mfd_detailled == True:
        if len(BG_hyp_list) > 1 and len(MFD_type_list)>1:
            index_model = 0
            for model in Model_list : 
                rate_in_catalog = catalog_cum_rate[index_model]
                for BG_hyp in BG_hyp_list :  
                    for MFD_type in MFD_type_list :
                        if not os.path.exists(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/'  + BG_hyp+ '/' +MFD_type):
                            os.makedirs(str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/'  + BG_hyp+ '/' +MFD_type)
                        rows_i = np.where(np.array(total_list_BG_hyp) == BG_hyp)[0] 
                        rows_j = np.where(np.array(total_list_MFD_type) == MFD_type)[0]  
                        rows = list(set(rows_i).intersection(rows_j)) 
                        rows_model = np.where(np.array(total_list_model) == model)[0]  
                        rows = list(set(rows).intersection(rows_model)) 
                        mfd_X = []
                        for index in rows :  
                            mfd = mega_mfd_cummulative[index]
                            mfd_X.append(mfd)
                            
                       #density plot
                        if plot_mfd == True :
                            hyp_name = BG_hyp + ' ' + MFD_type
                            path = str(Run_name) + '/analysis/figures/analyze_branches/Model/' + model+ '/'+ BG_hyp+ '/' +MFD_type
                            #total_list_hyp = total_list_MFD_type
                            do_the_plots(hyp_name,mfd_X,mega_bining_in_mag,xmin,xmax,ymin,ymax,Run_name,rate_in_catalog,plot_as_rep,a_s_model,rows,path,bining_in_mag)
                index_model +=1        
            
            
    return (total_list_ScL,total_list_dimension_used,geologic_moment_rate,
            geologic_moment_rate_no_as,total_list_scenario_name,total_list_MFD_type,
            mega_mfd_cummulative,total_list_model,total_list_sample,total_list_BG_hyp)
Beispiel #34
0
tif_ds.SetProjection(src_ds.GetProjection())
tif_ds.SetGeoTransform(src_ds.GetGeoTransform())
if src_ds.GetGCPCount() > 0:
    tif_ds.SetGCPs(src_ds.GetGCPs(), src_ds.GetGCPProjection())

# ----------------------------------------------------------------------------
# Do the processing one scanline at a time.

progress(0.0)
for iY in range(src_ds.RasterYSize):
    src_data = src_band.ReadAsArray(0, iY, src_ds.RasterXSize, 1)

    for iBand in range(out_bands):
        band_lookup = lookup[iBand]

        dst_data = Numeric.take(band_lookup, src_data)
        tif_ds.GetRasterBand(iBand + 1).WriteArray(dst_data, 0, iY)

    progress((iY + 1.0) / src_ds.RasterYSize)

tif_ds = None

# ----------------------------------------------------------------------------
# Translate intermediate file to output format if desired format is not TIFF.

if tif_filename != dst_filename:
    tif_ds = gdal.Open(tif_filename)
    dst_driver.CreateCopy(dst_filename, tif_ds)
    tif_ds = None

    gtiff_driver.Delete(tif_filename)
Beispiel #35
0
def onp_take(x, indices):
    a = onp.take(x, indices)
    b = onp.take(x, indices, axis=-1)
    c = onp.take(x, indices, axis=0, mode='wrap')
    d = onp.take(x, indices, axis=1, mode='clip')
    return a, b, c, d
Beispiel #36
0
def make_SED(m, par, model, DIG=False):
    # set up the SEDs and images
    
    if DIG:
        dig_str = "_DIG"
    else:
        dig_str = ""

    if cfg.par.SED_MONOCHROMATIC == True:

        # since all sources have the same spectrum just take the nu
        # from the input SED from the first source

        monochromatic_nu = m.sources[0].spectrum['nu'] * u.Hz
        monochromatic_lam = (constants.c / monochromatic_nu).to(u.micron).value[::-1]

        if cfg.par.FIX_SED_MONOCHROMATIC_WAVELENGTHS == True:
            # idx = np.round(np.linspace(np.min(np.where(monochromatic_lam > cfg.par.SED_MONOCHROMATIC_min_lam)[0]),\
            ##                           np.max(np.where(monochromatic_lam < cfg.par.SED_MONOCHROMATIC_max_lam)[0]),\
            #                           cfg.par.SED_MONOCHROMATIC_nlam))

            idx = np.where((monochromatic_lam > cfg.par.SED_MONOCHROMATIC_min_lam) & (
                        monochromatic_lam < cfg.par.SED_MONOCHROMATIC_max_lam))[0]
            monochromatic_lam = np.take(monochromatic_lam, list(idx))
        m.set_monochromatic(True, wavelengths=monochromatic_lam)
        m.set_raytracing(True)
        m.set_n_photons(initial=par.n_photons_initial,
                        imaging_sources=par.n_photons_imaging,
                        imaging_dust=par.n_photons_imaging,
                        raytracing_sources=par.n_photons_raytracing_sources,
                        raytracing_dust=par.n_photons_raytracing_dust)

        m.set_n_initial_iterations(3)
        m.set_convergence(True, percentile=99., absolute=1.01, relative=1.01)
        sed = m.add_peeled_images(sed=True, image=False)

        if cfg.par.MANUAL_ORIENTATION == True:
            sed.set_viewing_angles(np.array(cfg.par.THETA), np.array(cfg.par.PHI))

        else:
            sed.set_viewing_angles(np.linspace(0, 90, par.NTHETA).tolist() * par.NPHI,
                                   np.repeat(np.linspace(0, 90, par.NPHI), par.NPHI))
        sed.set_track_origin('basic')

        if cfg.par.SKIP_RT == False:
            m.write(model.inputfile + '.sed', overwrite=True)
            m.run(model.outputfile + str(dig_str) + '.sed', mpi=True, n_processes=par.n_MPI_processes, overwrite=True)

        print(
            '[pd_front_end]: Beginning RT Stage: Calculating SED using a monochromatic spectrum equal to the input SED')

    else:

        m.set_raytracing(True)
        m.set_n_photons(initial=par.n_photons_initial, imaging=par.n_photons_imaging,
                        raytracing_sources=par.n_photons_raytracing_sources,
                        raytracing_dust=par.n_photons_raytracing_dust)
        m.set_n_initial_iterations(7)
        m.set_convergence(True, percentile=99., absolute=1.01, relative=1.01)

        sed = m.add_peeled_images(sed=True, image=False)
        sed.set_wavelength_range(2500, 0.001, 1000.)

        if cfg.par.MANUAL_ORIENTATION == True:
            sed.set_viewing_angles(np.array(cfg.par.THETA), np.array(cfg.par.PHI))
        else:
            sed.set_viewing_angles(np.linspace(0, 90, par.NTHETA).tolist(
            ) * par.NPHI, np.repeat(np.linspace(0, 90, par.NPHI), par.NPHI))
        sed.set_track_origin('basic')

        print('[pd_front_end]: Beginning RT Stage: Calculating SED using a binned spectrum')

        # Run the Model
        if cfg.par.SKIP_RT == False:
            m.write(model.inputfile + '.sed', overwrite=True)
            m.run(model.outputfile + '.sed', mpi=True,
                  n_processes=par.n_MPI_processes, overwrite=True)
  def apply(self,
            inputs_q,
            inputs_kv,
            num_heads,
            dtype=jnp.float32,
            qkv_features=None,
            out_features=None,
            attention_axis=None,
            causal_mask=False,
            padding_mask=None,
            key_padding_mask=None,
            segmentation=None,
            key_segmentation=None,
            cache=None,
            broadcast_dropout=True,
            dropout_rng=None,
            dropout_rate=0.,
            deterministic=False,
            precision=None,
            kernel_init=default_kernel_init,
            bias_init=zeros,
            bias=True
            ):
    """Applies multi-head dot product attention on the input data.

    Projects the inputs into multi-headed query, key, and value vectors,
    applies dot-product attention and project the results to an output vector.

    This can be used for encoder-decoder attention by specifying both `inputs_q`
    and `inputs_kv` orfor self-attention by only specifying `inputs_q` and
    setting `inputs_kv` to None.

    Args:
      inputs_q: input queries of shape `[bs, dim1, dim2, ..., dimN, features]`.
      inputs_kv: key/values of shape `[bs, dim1, dim2, ..., dimN, features]`
        or None for self-attention, inn which case key/values will be derived
        from inputs_q.
      num_heads: number of attention heads. Features (i.e. inputs_q.shape[-1])
        should be divisible by the number of heads.
      dtype: the dtype of the computation (default: float32)
      qkv_features: dimension of the key, query, and value.
      out_features: dimension of the last projection
      attention_axis: axes over which the attention is applied ( 'None' means
        attention over all axes, but batch, heads, and features).
      causal_mask: boolean specifying whether to apply a causal mask on the
        attention weights. If True, the output at timestep `t` will not depend
        on inputs at timesteps strictly greater than `t`.
      padding_mask: boolean specifying query tokens that are pad token w/ False.
      key_padding_mask: boolean specifying key-value tokens that are pad token
        w/ False.
      segmentation: segment indices for packed inputs_q data.
      key_segmentation: segment indices for packed inputs_kv data.
      cache: an instance of `flax.nn.attention.Cache` used for efficient
        autoregressive decoding.
      broadcast_dropout: bool: use a broadcasted dropout along batch dims.
      dropout_rng: JAX PRNGKey: to be used for dropout
      dropout_rate: dropout rate
      deterministic: bool, deterministic or not (to apply dropout)
      precision: numerical precision of the computation see `jax.lax.Precision`
        for details.
      kernel_init: initializer for the kernel of the Dense layers.
      bias_init: initializer for the bias of the Dense layers.
      bias: bool: whether pointwise QKVO dense transforms use bias.
      attention_fn: dot_product_attention or compatible function. Accepts
      query, key, value, and returns output of shape
      `[bs, dim1, dim2, ..., dimN,, num_heads, value_channels]``

    Returns:
      output of shape `[bs, dim1, dim2, ..., dimN, features]`.
    """
    
    assert causal_mask or not cache, (
        'Caching is only support for causal attention.')

    if inputs_kv is None:
      inputs_kv = inputs_q

    is_self_attention = inputs_kv is inputs_q

    if attention_axis is None:
      attention_axis = tuple(range(1, inputs_q.ndim - 1))

    features = out_features or inputs_q.shape[-1]
    qkv_features = qkv_features or inputs_q.shape[-1]

    assert qkv_features % num_heads == 0, (
        'Memory dimension must be divisible by number of heads.')
    head_dim = qkv_features // num_heads

    dense = DenseGeneral.partial(
        axis=-1,
        features=(num_heads, head_dim),
        kernel_init=kernel_init,
        bias_init=bias_init,
        bias=bias,
        precision=precision)
    # project inputs_q to multi-headed q/k/v
    # dimensions are then [bs, dims..., n_heads, n_features_per_head]
    query, key, value = (dense(inputs_q, dtype=dtype, name='query'),
                         dense(inputs_kv, dtype=dtype, name='key'),
                         dense(inputs_kv, dtype=dtype, name='value'))

    if cache:
      assert isinstance(cache, Cache), 'cache must be an instance of Cache'
      if self.is_initializing():
        cache.store(np.array((key.ndim,) + key.shape[-2:], dtype=np.int32))
      else:
        cache_entry = cache.retrieve(None)
        expected_shape = list(cache_entry.key.shape[:-2])
        for attn_dim in attention_axis:
          expected_shape[attn_dim] = 1
        expected_shape = tuple(expected_shape) + inputs_q.shape[-1:]
        if expected_shape != inputs_q.shape:
          raise ValueError('Invalid shape provided, '
                           'expected shape %s instead got %s.' %
                           (expected_shape, inputs_q.shape))

        if not isinstance(cache_entry, _CacheEntry):
          raise ValueError('Cache is not initialized.')

        cshape = cache_entry.key.shape
        indices = [0] * len(cshape)
        i = cache_entry.i
        attn_size = np.prod(np.take(cshape, attention_axis))
        for attn_dim in attention_axis:
          attn_size //= cshape[attn_dim]
          indices[attn_dim] = i // attn_size
          i = i % attn_size

        key = lax.dynamic_update_slice(cache_entry.key, key, indices)
        value = lax.dynamic_update_slice(cache_entry.value, value, indices)
        one = jnp.array(1, jnp.uint32)
        cache_entry = cache_entry.replace(i=cache_entry.i + one,
                                          key=key,
                                          value=value)
        cache.store(cache_entry)

    # create attention masks
    mask_components = []

    if causal_mask:
      if cache and not self.is_initializing():
        bias_pre_shape = (1,) * (key.ndim - 1)
        attn_shape = tuple(np.take(key.shape, attention_axis))
        attn_size = np.prod(attn_shape)
        ii = jnp.arange(attn_size, dtype=jnp.uint32)
        mask = ii < cache_entry.i
        mask_components.append(mask.reshape(bias_pre_shape + attn_shape))
      else:
        mask_components.append(_make_causal_mask(key, attention_axis))

    if (padding_mask is not None or key_padding_mask is not None) and not cache:
      if key_padding_mask is None:
        if is_self_attention:
          key_padding_mask = padding_mask
        else:
          key_padding_shape = [inputs_kv.shape[dim] for dim in attention_axis]
          key_padding_mask = jnp.full(key_padding_shape, True)
      if padding_mask is None:
        if is_self_attention:
          padding_mask = key_padding_mask
        else:
          padding_shape = [inputs_q.shape[dim] for dim in attention_axis]
          padding_mask = jnp.full(padding_shape, True)

      padding_mask = make_padding_mask(
          padding_mask_query=padding_mask,
          padding_mask_key=key_padding_mask,
          query_shape=query.shape,
          key_shape=key.shape,
          attention_axis=attention_axis)
      mask_components.append(padding_mask)

    if segmentation is not None:
      if key_segmentation is None:
        assert is_self_attention
        key_segmentation = segmentation
      segmentation_mask = make_padding_mask(
          padding_mask_query=segmentation,
          padding_mask_key=key_segmentation,
          query_shape=query.shape,
          key_shape=key.shape,
          attention_axis=attention_axis,
          segmentation_mask=True)
      mask_components.append(segmentation_mask)

    if mask_components:
      attention_mask = mask_components[0]
      for component in mask_components[1:]:
        attention_mask = jnp.logical_and(attention_mask, component)

      # attention mask in the form of attention bias
      attention_bias = lax.select(
          attention_mask > 0, jnp.full(attention_mask.shape, 0.).astype(dtype),
          jnp.full(attention_mask.shape, -1e10).astype(dtype))
    else:
      attention_bias = None    
    # apply attention
    x = self.fast_unstruct_rfm_dot_product_attention.dot_product_attention(
        query,
        key,
        value,
        dtype=dtype,
        axis=attention_axis,
        bias=attention_bias,
        precision=precision,
        dropout_rng=dropout_rng,
        dropout_rate=dropout_rate,
        broadcast_dropout=broadcast_dropout,
        deterministic=deterministic)

    # back to the original inputs dimensions
    out = DenseGeneral(
        x,
        features=features,
        axis=(-2, -1),
        kernel_init=kernel_init,
        bias_init=bias_init,
        bias=bias,
        dtype=dtype,
        precision=precision,
        name='out')

    return out
              for_training=False,
              grad_req='null',
              shared_module=net)

begin = time.time()
for epoch in range(100):
    avg_cost = 0
    total_batch = int(math.ceil(dataX.shape[0] / batch_size))
    shuffle_ind = np.random.permutation(np.arange(dataX.shape[0]))
    dataX = dataX[shuffle_ind, :]
    dataY = dataY[shuffle_ind]
    for i in range(total_batch):
        # Slice the data batch and target batch.
        # Note that we use np.take to ensure that the batch will be padded correctly.
        data_npy = np.take(dataX,
                           indices=np.arange(i * batch_size, (i+1) * batch_size),
                           axis=0,
                           mode="clip")
        target_npy = np.take(dataY,
                             indices=np.arange(i * batch_size, (i + 1) * batch_size),
                             axis=0,
                             mode="clip")
        net.forward_backward(data_batch=mx.io.DataBatch(data=[nd.array(data_npy)],
                                                        label=[nd.array(target_npy)]))
        loss = net.get_outputs()[0].asscalar()
        avg_cost += loss / total_batch
        net.update()
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
print('Learning Finished!')
end = time.time()
print("Total Time Spent: %gs" %(end - begin))
Beispiel #39
0
def load_and_augment_data(dataset_name, model_params):
    """
    From datasets.CIFAR10:
        dataset.data: the image as numpy array, shape: (50000, 32, 32, 3)
        dataset.targets: labels of the images as list, len: 50000
    :return:
        augmented_labeled_X: the tensor of augmented labeled images (K=1),
                             size: (n_labeled_per_class * n_classes , 32, 32, 3)
        augmented_unlabeled_X: the tensor of augmented unlabeled images (K=2),
                             size: ((N/10 - n_labeled_per_class - n_validation) * n_classes * K , 32, 32, 3)
        train_labeled_targets: the tensor of labeled targets,
                             size = n_labeled_per_class * n_classes
        train_unlabeled_targets: the tensor of unlabeled targets,
                             size = (N/10 - n_labeled_per_class - n_validation) * n_classes
    """

    # Step 1: Set the model's hyperparameters
    n_classes = model_params["n_classes"]
    n_labeled_per_class = model_params["n_labeled_per_class"]
    n_validation = model_params["n_validation"]
    K = model_params["K"]

    # Step 2: Load the dataset
    if dataset_name == 'CIFAR10':
        dataset = datasets.CIFAR10(root="./datasets",
                                   train=True,
                                   download=True)
    elif dataset_name == 'SLT10':
        dataset = datasets.STL10(root="./datasets", download=True)
    else:
        raise ValueError("Invalid dataset name")

    # Step 3: Split the indexes
    train_labeled_indexes, train_unlabeled_indexes, validation_indexes = \
        split_indexes(n_classes, n_labeled_per_class, n_validation, dataset.targets)

    # Step 4: Attract the images for training, validation
    train_labeled_images = np.take(dataset.data, train_labeled_indexes, axis=0)
    train_unlabeled_images = np.take(dataset.data,
                                     train_unlabeled_indexes,
                                     axis=0)
    target_array = np.asarray(dataset.targets)
    train_labeled_targets = np.take(target_array,
                                    train_labeled_indexes,
                                    axis=0)
    train_unlabeled_targets = np.take(target_array,
                                      train_unlabeled_indexes,
                                      axis=0)
    validation_images = np.take(dataset.data, validation_indexes, axis=0)
    validation_targets = np.take(target_array, validation_indexes, axis=0)

    # Step 5: Normalise the datasets
    train_labeled_images = normalise(train_labeled_images)
    train_unlabeled_images = normalise(train_unlabeled_images)

    # Step 6: Augment training images
    augmented_labeled_X = augment(train_labeled_images, K=1)
    augmented_unlabeled_X = augment(train_unlabeled_images, K=K)

    # Take a look at some of the augmented images
    # displayImages(train_labeled_images[:10], title1="Original-Labeled", title2="Augmented-Labeled",
    #               augmented_images=augmented_labeled_X[:10], labels=train_labeled_targets[:10])
    # n_unlabeled = train_unlabeled_images.shape[0]
    # displayImages(train_unlabeled_images[:10], title1="Original-Unlabeled", title2="Augmented-Unlabeled",
    #               augmented_images=augmented_unlabeled_X[:10], labels=train_unlabeled_targets[:10])
    # displayImages(augmented_unlabeled_X[:10], title1="Augmented-Unlabeled1", title2="Augmented-Unlabeled2",
    #               augmented_images=augmented_unlabeled_X[n_unlabeled:10+n_unlabeled],
    #               labels=train_unlabeled_targets[:10])

    # Step 7: Change the dimension of np.array in oder for it to work with torch
    augmented_labeled_X = to_tensor_dim(augmented_labeled_X)
    augmented_unlabeled_X = to_tensor_dim(augmented_unlabeled_X)
    validation_images = to_tensor_dim(validation_images)

    return torch.from_numpy(augmented_labeled_X), torch.from_numpy(augmented_unlabeled_X), \
           torch.from_numpy(train_labeled_targets), torch.from_numpy(train_unlabeled_targets), \
           torch.from_numpy(validation_images), torch.from_numpy(validation_targets)
Beispiel #40
0
    def __next__(self, batch_size=None):
        """Generate each mini-batch.
        Args:
            batch_size (int, optional): the size of mini-batch
        Returns:
            A tuple of `(inputs, labels, inputs_seq_len, labels_seq_len, input_names)`
                inputs: list of input data of size
                    `[B, T, input_dim]`
                labels_main: list of target labels in the main task, of size
                    `[B, T]`
                labels_sub: list of target labels in the sub task, of size
                    `[B, T]`
                inputs_seq_len: list of length of inputs of size
                    `[B]`
                input_names: list of file name of input data of size
                    `[B]`
            is_new_epoch (bool): If true, one epoch is finished
        """
        if self.max_epoch is not None and self.epoch >= self.max_epoch:
            raise StopIteration
        # NOTE: max_epoch = None means infinite loop

        if batch_size is None:
            batch_size = self.batch_size

        # reset
        if self.is_new_epoch:
            self.is_new_epoch = False

        if self.sort_utt:
            # Sort all uttrances by length
            if len(self.rest) > batch_size:
                data_indices = sorted(list(self.rest))[:batch_size]
                self.rest -= set(data_indices)
                # NOTE: rest is uttrance length order
            else:
                # Last mini-batch
                data_indices = list(self.rest)
                self.reset()
                self.is_new_epoch = True
                self.epoch += 1
                if self.epoch == self.sort_stop_epoch:
                    self.sort_utt = False

            # Shuffle data in the mini-batch
            random.shuffle(data_indices)

        elif self.shuffle:
            # Randomly sample uttrances
            if len(self.rest) > batch_size:
                data_indices = random.sample(list(self.rest), batch_size)
                self.rest -= set(data_indices)
            else:
                # Last mini-batch
                data_indices = list(self.rest)
                self.reset()
                self.is_new_epoch = True
                self.epoch += 1

                # Shuffle selected mini-batch
                random.shuffle(data_indices)

        else:
            if len(self.rest) > batch_size:
                data_indices = sorted(list(self.rest))[:batch_size]
                self.rest -= set(data_indices)
                # NOTE: rest is in name order
            else:
                # Last mini-batch
                data_indices = list(self.rest)
                self.reset()
                self.is_new_epoch = True
                self.epoch += 1

        # Compute max frame num in mini-batch
        max_frame_num = max(
            map(lambda x: x.shape[0], self.input_list[data_indices]))

        # Compute max target label length in mini-batch
        max_seq_len_main = max(map(len, self.label_main_list[data_indices]))
        max_seq_len_sub = max(map(len, self.label_sub_list[data_indices]))

        # Initialization
        inputs = np.zeros((len(data_indices), max_frame_num,
                           self.input_list[0].shape[-1] * self.splice),
                          dtype=np.float32)
        labels_main = np.array([[self.padded_value] * max_seq_len_main] *
                               len(data_indices),
                               dtype=np.int32)
        labels_sub = np.array([[self.padded_value] * max_seq_len_sub] *
                              len(data_indices),
                              dtype=np.int32)
        inputs_seq_len = np.zeros((len(data_indices), ), dtype=np.int32)
        input_names = np.array(
            list(
                map(lambda path: basename(path).split('.')[0],
                    np.take(self.input_paths, data_indices, axis=0))))

        # Set values of each data in mini-batch
        for i_batch, x in enumerate(data_indices):
            data_i = self.input_list[x]
            frame_num, input_size = data_i.shape

            # Splicing
            data_i = data_i.reshape(1, frame_num, input_size)
            data_i = do_splice(data_i, splice=self.splice,
                               batch_size=1).reshape(frame_num, -1)

            inputs[i_batch, :frame_num, :] = data_i
            labels_main[i_batch, :len(self.label_main_list[x]
                                      )] = self.label_main_list[x]
            labels_sub[
                i_batch, :len(self.label_sub_list[x])] = self.label_sub_list[x]
            inputs_seq_len[i_batch] = frame_num

        self.iteration += len(data_indices)

        return (inputs, labels_main, labels_sub, inputs_seq_len,
                input_names), self.is_new_epoch
Beispiel #41
0
def main(random_state=1,
         test_size=0.2,
         n_instances=1000000,
         out_dir='continuous'):

    # create logger
    logger = get_logger('log.txt')

    # columns to use
    cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

    # data dtypes for each column
    dtypes = {c: np.float32 for c in cols}
    dtypes[0] = np.uint8

    # retrieve dataset
    start = time.time()
    df = pd.read_csv('day_0',
                     sep='\t',
                     header=None,
                     usecols=cols,
                     dtype=dtypes,
                     nrows=n_instances)
    logger.info('reading in dataset...{:.3f}s'.format(time.time() - start))
    logger.info('{}'.format(df))
    logger.info('Memory usage: {:,} bytes'.format(
        df.memory_usage(deep=True).sum()))

    # get numpy array
    X = df.values
    df = None

    # impute missing values with the mean
    logger.info('imputing missing values with the mean...')
    assert np.isnan(X[:, 0]).sum() == 0
    col_mean = np.nanmean(X, axis=0)
    nan_indices = np.where(np.isnan(X))
    X[nan_indices] = np.take(col_mean, nan_indices[1])

    # move the label column in X to the last column
    logger.info('moving label column to the last column...')
    y = X[:, 0].copy().reshape(-1, 1)
    X = np.delete(X, 0, 1)
    X = np.hstack([X, y])

    # split into train and test
    logger.info('splitting into train and test sets...')
    indices = np.arange(X.shape[0])
    n_train_samples = int(len(indices) * (1 - test_size))

    np.random.seed(random_state)
    train_indices = np.random.choice(indices,
                                     size=n_train_samples,
                                     replace=False)
    test_indices = np.setdiff1d(indices, train_indices)

    train = X[train_indices]
    test = X[test_indices]

    logger.info('train.shape: {}, label sum: {}'.format(
        train.shape, train[:, -1].sum()))
    logger.info('test.shape: {}, label sum: {}'.format(test.shape,
                                                       test[:, -1].sum()))

    # save to numpy format
    logger.info('saving...')
    os.makedirs(out_dir, exist_ok=True)
    np.save(os.path.join(out_dir, 'train.npy'), train)
    np.save(os.path.join(out_dir, 'test.npy'), test)
Beispiel #42
0
def make_plots(clust_CI, clust_MI, clust_MI_r, clust_params):
    '''
    Plot the CI vs MI diagram for each MI.
    '''

    # Color is associated with the dist; size with the initial mass and
    # the marker with the age.
    mrk = {7.: ('o', '$\log(age)=7.$'), 8.: ('s', '$\log(age)=8.$'),
        9.: ('D', '$\log(age)=9.$')}

    # Make plot.
    plt.figure(figsize=(14, 25))  # create the top-level container
    gs = gridspec.GridSpec(4, 3, width_ratios=[1, 1, 0.05])
    xy_font_s = 21

    ax0 = plt.subplot(gs[0])
    ax0.set_title('Decontamination algorithm', fontsize=xy_font_s)
    plt.ylabel('$MI_1$', fontsize=xy_font_s)
    plt.xlim(0., 0.97)
    plt.ylim(-0.01, 0.99)
    # make these tick labels invisible
    plt.setp(ax0.get_xticklabels(), visible=False)
    # Set steps in axis.
    ax0.yaxis.set_major_locator(MultipleLocator(0.2))
    # Plot grid
    plt.grid(b=True, which='major', color='gray', linestyle='--', zorder=1)
    # Add text box with MI equation.
    text = r'$MI_1 = n_m/N_{cl}$' '\n' r'  $(MP >\,0.9)$'
    x_align, y_align = 0.57, 0.85
    plt.text(x_align, y_align, text, transform=ax0.transAxes,
             bbox=dict(facecolor='white', alpha=0.6), fontsize=(xy_font_s + 2))
    # Define color map.
    cm = plt.cm.get_cmap('RdYlBu_r')
    # Order.
    mass, age, dist = clust_params
    order = np.argsort(-np.array(mass))
    z1 = np.take((np.array(mass) / 5.), order)
    z2 = np.take(age, order)
    z3 = np.take(dist, order)
    # Order before plotting.
    x = np.take(clust_CI, order)
    y = np.take(clust_MI[0], order)
    for key, value in sorted(mrk.items()):
        s1 = (z2 == key)
        plt.scatter(x[s1], y[s1],
            marker=value[0], label=value[1],
            s=z1[s1],
            c=z3[s1], cmap=cm, lw=0.2)
    # Plot regression line.
    m, b = np.polyfit(clust_CI, clust_MI[0], 1)
    range_CI = np.linspace(0., 1., 10)
    plt.plot(range_CI, m * range_CI + b, c='k', ls='--')

    #
    # Random MI.
    ax1 = plt.subplot(gs[1])
    ax1.set_title('Random probability', fontsize=xy_font_s)
    plt.xlim(0., 0.97)
    plt.ylim(-0.01, 0.99)
    # make these tick labels invisible
    plt.setp(ax1.get_yticklabels(), visible=False)
    plt.setp(ax1.get_xticklabels(), visible=False)
    ax1.yaxis.set_major_locator(MultipleLocator(0.2))
    # Plot grid
    plt.grid(b=True, which='major', color='gray', linestyle='--', zorder=1)
    # Define color map.
    cm = plt.cm.get_cmap('RdYlBu_r')
    # Order.
    mass, age, dist = clust_params
    order = np.argsort(-np.array(mass))
    z1 = np.take((np.array(mass) / 5.), order)
    z2 = np.take(age, order)
    z3 = np.take(dist, order)
    # Order before plotting.
    x = np.take(clust_CI, order)
    y = np.take(clust_MI_r[0], order)
    for key, value in sorted(mrk.items()):
        s1 = (z2 == key)
        SC = plt.scatter(x[s1], y[s1],
            marker=value[0], label=value[1],
            s=z1[s1],
            c=z3[s1], cmap=cm, lw=0.2)
    # Plot regression line.
    m, b = np.polyfit(clust_CI, clust_MI_r[0], 1)
    range_CI = np.linspace(0., 1., 10)
    plt.plot(range_CI, m * range_CI + b, c='k', ls='--')
    # Plot legend.
    legend = plt.legend(loc="upper right", markerscale=0.7, scatterpoints=1,
        fontsize=17)
    for i in range(len(mrk)):
        legend.legendHandles[i].set_color('k')
    # Colorbar
    axp2 = plt.subplot(gs[2])
    cbar = plt.colorbar(SC, cax=axp2)
    cbar.set_ticks([0.5, 1., 3., 5.])
    cbar.set_ticklabels([0.5, 1., 3., 5.])
    cbar.set_label('$dist\,(kpc)$', fontsize=xy_font_s, labelpad=-15, y=0.35)

    #
    # Second MI.
    ax3 = plt.subplot(gs[3])
    plt.xlabel('$CI$', fontsize=xy_font_s)
    plt.ylabel('$MI_2$', fontsize=xy_font_s)
    plt.xlim(0., 0.97)
    plt.ylim(max(min(clust_MI[1]) - 0.1, -2.5), 0.99)
    ax3.yaxis.set_major_locator(MultipleLocator(0.4))
    # Plot grid
    plt.grid(b=True, which='major', color='gray', linestyle='--', zorder=1)
    # Add text box with MI equation.
    text = (r'$MI_2 = \frac{\left(\sum^{n_m}{p_m} - ' +
           r' \sum^{n_f}{p_f}\right)}{N_{cl}}$')
    x_align, y_align = 0.52, 0.86
    plt.text(x_align, y_align, text, transform=ax3.transAxes,
             bbox=dict(facecolor='white', alpha=0.6), fontsize=(xy_font_s + 2))
    plt.axhline(y=0., linestyle='--', color='r', zorder=3)
    # Define color map.
    cm = plt.cm.get_cmap('RdYlBu_r')
    # Order.
    mass, age, dist = clust_params
    order = np.argsort(-np.array(mass))
    z1 = np.take((np.array(mass) / 5.), order)
    z2 = np.take(age, order)
    z3 = np.take(dist, order)
    # Order before plotting.
    x = np.take(clust_CI, order)
    y = np.take(clust_MI[1], order)
    for key, value in sorted(mrk.items()):
        s1 = (z2 == key)
        plt.scatter(x[s1], y[s1],
            marker=value[0], label=value[1],
            s=z1[s1],
            c=z3[s1], cmap=cm, lw=0.2)
    # Plot regression line.
    m, b = np.polyfit(clust_CI, clust_MI[1], 1)
    range_CI = np.linspace(0., 1., 10)
    plt.plot(range_CI, m * range_CI + b, c='k', ls='--')
    plt.axhline(y=0., linestyle='--', color='r', zorder=3)

    #
    # Second random MI.
    ax4 = plt.subplot(gs[4])
    plt.xlabel('$CI$', fontsize=xy_font_s)
    plt.xlim(0., 0.97)
    plt.ylim(max(min(clust_MI[1]) - 0.1, -2.5), 0.99)
    # make these tick labels invisible
    plt.setp(ax4.get_yticklabels(), visible=False)
    ax4.yaxis.set_major_locator(MultipleLocator(0.4))
    # Plot grid
    plt.grid(b=True, which='major', color='gray', linestyle='--', zorder=1)
    plt.axhline(y=0., linestyle='--', color='r', zorder=3)
    # Define color map.
    cm = plt.cm.get_cmap('RdYlBu_r')
    # Order.
    mass, age, dist = clust_params
    order = np.argsort(-np.array(mass))
    z1 = np.take((np.array(mass) / 5.), order)
    z2 = np.take(age, order)
    z3 = np.take(dist, order)
    # Order before plotting.
    x = np.take(clust_CI, order)
    y = np.take(clust_MI_r[1], order)
    for key, value in sorted(mrk.items()):
        s1 = (z2 == key)
        plt.scatter(x[s1], y[s1],
            marker=value[0], label=value[1],
            s=z1[s1],
            c=z3[s1], cmap=cm, lw=0.2)
    # Plot regression line.
    m, b = np.polyfit(clust_CI, clust_MI_r[1], 1)
    range_CI = np.linspace(0., 1., 10)
    plt.plot(range_CI, m * range_CI + b, c='k', ls='--')
    # Colorbar
    #axp4 = plt.subplot(gs[5])
    #cbar = plt.colorbar(SC2, cax=axp4)
    #cbar.set_ticks([0.5, 1., 3., 5.])
    #cbar.set_ticklabels([0.5, 1., 3., 5.])
    #cbar.set_label('$dist\,(kpc)$', fontsize=xy_font_s, labelpad=-15, y=0.35)

    # Save to output png file.
    plt.tight_layout()
    out_png = dir_memb_files + 'MI_analisys.png'
    plt.savefig(out_png, dpi=150)
    print 'Plot done.'
Beispiel #43
0
def load_dataset(data_dir, test_size, val_size):
    """
	Args:
		data_dir: path to folder
		test_size: test set percentage
		val_size: val set percentage

	Returns:
		dict containing mapping from from class to training, validation
		and testing set
	"""
    tot = test_size + val_size
    train_size = 100 - tot

    assert test_size >= 1, 'Test percent must be non-negative'
    assert val_size >= 1, 'Valid percent must be non-negative'
    assert test_size <= 25, 'Keep test percent below 25'
    assert val_size <= 25, 'Keep valid percent below 25'
    assert tot <= 40, 'Train on atleast 60%. Current training percent {}'.format(
        train_size)

    if os.path.exists(data_dir):
        dataset = {}
        print('/{} exists'.format(data_dir))
        folders = [
            folder for folder in os.listdir(data_dir)
            if not folder == '.DS_Store'
        ]
        print(folders)
        for folder in folders:
            files = []
            files = [
                file for file in os.listdir(data_dir + '/' + folder)
                if not file == '.DS_Store'
            ]
            num_files = len(files)

            shuffled = np.random.permutation(num_files)
            n_val, n_test = int((val_size / 100) * num_files), int(
                (test_size / 100) * num_files)
            valid_idx, test_idx, train_idx = shuffled[:n_val], shuffled[
                n_val:n_val + n_test], shuffled[n_val + n_test:]

            print('{} has {} images'.format(folder, num_files))

            train_set, test_set, valid_set = [], [], []

            train_set = list(np.squeeze(list(np.take(files, train_idx))))
            test_set = list(np.squeeze(list(np.take(files, test_idx))))
            valid_set = list(np.squeeze(list(np.take(files, valid_idx))))

            dataset[folder] = {
                'train': train_set,
                'valid': valid_set,
                'test': test_set
            }

        return folders, dataset
    print('Path does not exist!!')

    return None
Beispiel #44
0
def get_multiplier(output_tensor, new_shape):
    class_binary = [[0], [1]]
    class_binary = np.asarray(class_binary, dtype=np.uint8)
    output = output_tensor.reshape(new_shape)
    output_colors = np.take(class_binary, output, axis=0)
    return output_colors
Beispiel #45
0
def merge_percentiles(finalq, qs, vals, interpolation="lower", Ns=None):
    """Combine several percentile calculations of different data.

    Parameters
    ----------

    finalq : numpy.array
        Percentiles to compute (must use same scale as ``qs``).
    qs : sequence of :class:`numpy.array`s
        Percentiles calculated on different sets of data.
    vals : sequence of :class:`numpy.array`s
        Resulting values associated with percentiles ``qs``.
    Ns : sequence of integers
        The number of data elements associated with each data set.
    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
        Specify the type of interpolation to use to calculate final
        percentiles.  For more information, see :func:`numpy.percentile`.

    Examples
    --------

    >>> finalq = [10, 20, 30, 40, 50, 60, 70, 80]
    >>> qs = [[20, 40, 60, 80], [20, 40, 60, 80]]
    >>> vals = [np.array([1, 2, 3, 4]), np.array([10, 11, 12, 13])]
    >>> Ns = [100, 100]  # Both original arrays had 100 elements

    >>> merge_percentiles(finalq, qs, vals, Ns=Ns)
    array([ 1,  2,  3,  4, 10, 11, 12, 13])
    """
    from .utils import array_safe, empty_like_safe

    if isinstance(finalq, Iterator):
        finalq = list(finalq)
    finalq = array_safe(finalq, like=finalq)
    qs = list(map(list, qs))
    vals = list(vals)
    if Ns is None:
        vals, Ns = zip(*vals)
    Ns = list(Ns)

    L = list(zip(*[(q, val, N) for q, val, N in zip(qs, vals, Ns) if N]))
    if not L:
        raise ValueError("No non-trivial arrays found")
    qs, vals, Ns = L

    # TODO: Perform this check above in percentile once dtype checking is easy
    #       Here we silently change meaning
    if vals[0].dtype.name == "category":
        result = merge_percentiles(finalq, qs, [v.codes for v in vals],
                                   interpolation, Ns)
        import pandas as pd

        return pd.Categorical.from_codes(result, vals[0].categories,
                                         vals[0].ordered)
    if not np.issubdtype(vals[0].dtype, np.number):
        interpolation = "nearest"

    if len(vals) != len(qs) or len(Ns) != len(qs):
        raise ValueError("qs, vals, and Ns parameters must be the same length")

    # transform qs and Ns into number of observations between percentiles
    counts = []
    for q, N in zip(qs, Ns):
        count = empty_like_safe(finalq, shape=len(q))
        count[1:] = np.diff(array_safe(q, like=q[0]))
        count[0] = q[0]
        count *= N
        counts.append(count)

    # Sort by calculated percentile values, then number of observations.
    combined_vals = np.concatenate(vals)
    combined_counts = array_safe(np.concatenate(counts), like=combined_vals)
    sort_order = np.argsort(combined_vals)
    combined_vals = np.take(combined_vals, sort_order)
    combined_counts = np.take(combined_counts, sort_order)

    # percentile-like, but scaled by total number of observations
    combined_q = np.cumsum(combined_counts)

    # rescale finalq percentiles to match combined_q
    finalq = array_safe(finalq, like=combined_vals)
    desired_q = finalq * sum(Ns)

    # the behavior of different interpolation methods should be
    # investigated further.
    if interpolation == "linear":
        rv = np.interp(desired_q, combined_q, combined_vals)
    else:
        left = np.searchsorted(combined_q, desired_q, side="left")
        right = np.searchsorted(combined_q, desired_q, side="right") - 1
        np.minimum(left,
                   len(combined_vals) - 1, left)  # don't exceed max index
        lower = np.minimum(left, right)
        upper = np.maximum(left, right)
        if interpolation == "lower":
            rv = combined_vals[lower]
        elif interpolation == "higher":
            rv = combined_vals[upper]
        elif interpolation == "midpoint":
            rv = 0.5 * (combined_vals[lower] + combined_vals[upper])
        elif interpolation == "nearest":
            lower_residual = np.abs(combined_q[lower] - desired_q)
            upper_residual = np.abs(combined_q[upper] - desired_q)
            mask = lower_residual > upper_residual
            index = lower  # alias; we no longer need lower
            index[mask] = upper[mask]
            rv = combined_vals[index]
        else:
            raise ValueError("interpolation can only be 'linear', 'lower', "
                             "'higher', 'midpoint', or 'nearest'")
    return rv
Beispiel #46
0
def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                 target_column='default-target', cache=True, return_X_y=False):
    """Fetch dataset from openml by name or dataset id.

    Datasets are uniquely identified by either an integer ID or by a
    combination of name and version (i.e. there might be multiple
    versions of the 'iris' dataset). Please give either name or data_id
    (not both). In case a name is given, a version can also be
    provided.

    Read more in the :ref:`User Guide <openml>`.

    .. note:: EXPERIMENTAL

        The API is experimental in version 0.20 (particularly the return value
        structure), and might have small backward-incompatible changes in
        future releases.

    Parameters
    ----------
    name : str or None
        String identifier of the dataset. Note that OpenML can have multiple
        datasets with the same name.

    version : integer or 'active', default='active'
        Version of the dataset. Can only be provided if also ``name`` is given.
        If 'active' the oldest version that's still active is used. Since
        there may be more than one active version of a dataset, and those
        versions may fundamentally be different from one another, setting an
        exact version is highly recommended.

    data_id : int or None
        OpenML ID of the dataset. The most specific way of retrieving a
        dataset. If data_id is not given, name (and potential version) are
        used to obtain a dataset.

    data_home : string or None, default None
        Specify another download and cache folder for the data sets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    target_column : string, list or None, default 'default-target'
        Specify the column name in the data to use as target. If
        'default-target', the standard target column a stored on the server
        is used. If ``None``, all columns are returned as data and the
        target is ``None``. If list (of strings), all columns with these names
        are returned as multi-target (Note: not all scikit-learn classifiers
        can handle all types of multi-output combinations)

    cache : boolean, default=True
        Whether to cache downloaded datasets using joblib.

    return_X_y : boolean, default=False.
        If True, returns ``(data, target)`` instead of a Bunch object. See
        below for more information about the `data` and `target` objects.

    Returns
    -------

    data : Bunch
        Dictionary-like object, with attributes:

        data : np.array or scipy.sparse.csr_matrix of floats
            The feature matrix. Categorical features are encoded as ordinals.
        target : np.array
            The regression target or classification labels, if applicable.
            Dtype is float if numeric, and object if categorical.
        DESCR : str
            The full description of the dataset
        feature_names : list
            The names of the dataset columns
        categories : dict
            Maps each categorical feature name to a list of values, such
            that the value encoded as i is ith in the list.
        details : dict
            More metadata from OpenML

    (data, target) : tuple if ``return_X_y`` is True

        .. note:: EXPERIMENTAL

            This interface is **experimental** as at version 0.20 and
            subsequent releases may change attributes without notice
            (although there should only be minor changes to ``data``
            and ``target``).

        Missing values in the 'data' are represented as NaN's. Missing values
        in 'target' are represented as NaN's (numerical target) or None
        (categorical target)
    """
    data_home = get_data_home(data_home=data_home)
    data_home = join(data_home, 'openml')
    if cache is False:
        # no caching will be applied
        data_home = None

    # check valid function arguments. data_id XOR (name, version) should be
    # provided
    if name is not None:
        # OpenML is case-insensitive, but the caching mechanism is not
        # convert all data names (str) to lower case
        name = name.lower()
        if data_id is not None:
            raise ValueError(
                "Dataset data_id={} and name={} passed, but you can only "
                "specify a numeric data_id or a name, not "
                "both.".format(data_id, name))
        data_info = _get_data_info_by_name(name, version, data_home)
        data_id = data_info['did']
    elif data_id is not None:
        # from the previous if statement, it is given that name is None
        if version is not "active":
            raise ValueError(
                "Dataset data_id={} and version={} passed, but you can only "
                "specify a numeric data_id or a version, not "
                "both.".format(data_id, name))
    else:
        raise ValueError(
            "Neither name nor data_id are provided. Please provide name or "
            "data_id.")

    data_description = _get_data_description_by_id(data_id, data_home)
    if data_description['status'] != "active":
        warn("Version {} of dataset {} is inactive, meaning that issues have "
             "been found in the dataset. Try using a newer version from "
             "this URL: {}".format(
                data_description['version'],
                data_description['name'],
                data_description['url']))
    if 'error' in data_description:
        warn("OpenML registered a problem with the dataset. It might be "
             "unusable. Error: {}".format(data_description['error']))
    if 'warning' in data_description:
        warn("OpenML raised a warning on the dataset. It might be "
             "unusable. Warning: {}".format(data_description['warning']))

    # download data features, meta-info about column types
    features_list = _get_data_features(data_id, data_home)

    for feature in features_list:
        if 'true' in (feature['is_ignore'], feature['is_row_identifier']):
            continue
        if feature['data_type'] == 'string':
            raise ValueError('STRING attributes are not yet supported')

    if target_column == "default-target":
        # determines the default target based on the data feature results
        # (which is currently more reliable than the data description;
        # see issue: https://github.com/openml/OpenML/issues/768)
        target_column = [feature['name'] for feature in features_list
                         if feature['is_target'] == 'true']
    elif isinstance(target_column, string_types):
        # for code-simplicity, make target_column by default a list
        target_column = [target_column]
    elif target_column is None:
        target_column = []
    elif not isinstance(target_column, list):
        raise TypeError("Did not recognize type of target_column"
                        "Should be six.string_type, list or None. Got: "
                        "{}".format(type(target_column)))
    data_columns = _valid_data_column_names(features_list,
                                            target_column)

    # prepare which columns and data types should be returned for the X and y
    features_dict = {feature['name']: feature for feature in features_list}

    # XXX: col_slice_y should be all nominal or all numeric
    _verify_target_data_type(features_dict, target_column)

    col_slice_y = [int(features_dict[col_name]['index'])
                   for col_name in target_column]

    col_slice_x = [int(features_dict[col_name]['index'])
                   for col_name in data_columns]
    for col_idx in col_slice_y:
        feat = features_list[col_idx]
        nr_missing = int(feat['number_of_missing_values'])
        if nr_missing > 0:
            raise ValueError('Target column {} has {} missing values. '
                             'Missing values are not supported for target '
                             'columns. '.format(feat['name'], nr_missing))

    # determine arff encoding to return
    return_sparse = False
    if data_description['format'].lower() == 'sparse_arff':
        return_sparse = True

    # obtain the data
    arff = _download_data_arff(data_description['file_id'], return_sparse,
                               data_home)
    arff_data = arff['data']
    # nominal attributes is a dict mapping from the attribute name to the
    # possible values. Includes also the target column (which will be popped
    # off below, before it will be packed in the Bunch object)
    nominal_attributes = {k: v for k, v in arff['attributes']
                          if isinstance(v, list) and
                          k in data_columns + target_column}

    X, y = _convert_arff_data(arff_data, col_slice_x, col_slice_y)

    is_classification = {col_name in nominal_attributes
                         for col_name in target_column}
    if not is_classification:
        # No target
        pass
    elif all(is_classification):
        y = np.hstack([np.take(np.asarray(nominal_attributes.pop(col_name),
                                          dtype='O'),
                               y[:, i:i+1].astype(int))
                       for i, col_name in enumerate(target_column)])
    elif any(is_classification):
        raise ValueError('Mix of nominal and non-nominal targets is not '
                         'currently supported')

    description = u"{}\n\nDownloaded from openml.org.".format(
        data_description.pop('description'))

    # reshape y back to 1-D array, if there is only 1 target column; back
    # to None if there are not target columns
    if y.shape[1] == 1:
        y = y.reshape((-1,))
    elif y.shape[1] == 0:
        y = None

    if return_X_y:
        return X, y

    bunch = Bunch(
        data=X, target=y, feature_names=data_columns,
        DESCR=description, details=data_description,
        categories=nominal_attributes,
        url="https://www.openml.org/d/{}".format(data_id))

    return bunch
Beispiel #47
0
    def __call__(self, image, boxes, classes, crop_h_=None, crop_w_=None):
        if len(boxes) == 0:
            return image, boxes, classes

        h, w, _ = np.shape(image)
        gt_bbox = boxes

        # NOTE Original method attempts to generate one candidate for each
        # threshold then randomly sample one from the resulting list.
        # Here a short circuit approach is taken, i.e., randomly choose a
        # threshold and attempt to find a valid crop, and simply return the
        # first one found.
        # The probability is not exactly the same, kinda resembling the
        # "Monty Hall" problem. Actually carrying out the attempts will affect
        # observability (just like opening doors in the "Monty Hall" game).
        thresholds = list(self.thresholds)
        if self.allow_no_crop:
            thresholds.append('no_crop')
        np.random.shuffle(thresholds)

        for thresh in thresholds:
            if thresh == 'no_crop':
                return image, boxes, classes

            found = False
            for i in range(self.num_attempts):
                scale = np.random.uniform(*self.scaling)
                min_ar, max_ar = self.aspect_ratio
                aspect_ratio = np.random.uniform(max(min_ar, scale**2),
                                                 min(max_ar, scale**-2))
                if crop_h_ is not None:
                    crop_h = min(crop_h_, h)
                else:
                    crop_h = int(h * scale / np.sqrt(aspect_ratio))

                if crop_w_ is not None:
                    crop_w = min(crop_w_, w)
                else:
                    crop_w = int(w * scale * np.sqrt(aspect_ratio))

                if h > crop_h:
                    crop_y = np.random.randint(0, h - crop_h)
                else:
                    crop_y = 0
                if w > crop_w:
                    crop_x = np.random.randint(0, w - crop_w)
                else:
                    crop_x = 0
                crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
                iou = self._iou_matrix(gt_bbox,
                                       np.array([crop_box], dtype=np.float32))
                if iou.max() < thresh:
                    continue

                if self.cover_all_box and iou.min() < thresh:
                    continue

                cropped_box, valid_ids = self._crop_box_with_center_constraint(
                    gt_bbox, np.array(crop_box, dtype=np.float32))
                if valid_ids.size > 0:
                    found = True
                    break
            if found:
                image = self._crop_image(image, crop_box)
                boxes = np.take(cropped_box, valid_ids, axis=0)
                classes = np.take(classes, valid_ids, axis=0)
                #sample['w'] = crop_box[2] - crop_box[0]
                #sample['h'] = crop_box[3] - crop_box[1]
                return image, boxes, classes

        return image, boxes, classes
Beispiel #48
0
    def generate_knntriplets(self, X, k_genuine, k_impostor):
        """
    Generates triplets from labeled data.

    For every point (X_a) the triplets (X_a, X_b, X_c) are constructed from all
    the combinations of taking one of its `k_genuine`-nearest neighbors of the
    same class (X_b) and taking one of its `k_impostor`-nearest neighbors of
    other classes (X_c).

    In the case a class doesn't have enough points in the same class (other
    classes) to yield `k_genuine` (`k_impostor`) neighbors a warning will be
    raised and the maximum value of genuine (impostor) neighbors will be used
    for that class.

    Parameters
    ----------
      X : (n x d) matrix
        Input data, where each row corresponds to a single instance.
      k_genuine : int
        Number of neighbors of the same class to be taken into account.
      k_impostor : int
        Number of neighbors of different classes to be taken into account.

    Returns
    -------
    triplets : array-like, shape=(n_constraints, 3)
      2D array of triplets of indicators.
    """
        # Ignore unlabeled samples
        known_labels_mask = self.partial_labels >= 0
        known_labels = self.partial_labels[known_labels_mask]
        X = X[known_labels_mask]

        labels, labels_count = np.unique(known_labels, return_counts=True)
        len_input = known_labels.shape[0]

        # Handle the case where there are too few elements to yield k_genuine or
        # k_impostor neighbors for every class.

        k_genuine_vec = np.full_like(labels, k_genuine)
        k_impostor_vec = np.full_like(labels, k_impostor)

        for i, count in enumerate(labels_count):
            if k_genuine + 1 > count:
                k_genuine_vec[i] = count - 1
                warnings.warn(
                    "The class {} has {} elements, which is not sufficient "
                    "to generate {} genuine neighbors as specified by "
                    "k_genuine. Will generate {} genuine neighbors instead."
                    "\n".format(labels[i], count, k_genuine + 1,
                                k_genuine_vec[i]))
            if k_impostor > len_input - count:
                k_impostor_vec[i] = len_input - count
                warnings.warn(
                    "The class {} has {} elements of other classes, which is"
                    " not sufficient to generate {} impostor neighbors as "
                    "specified by k_impostor. Will generate {} impostor "
                    "neighbors instead.\n".format(labels[i], k_impostor_vec[i],
                                                  k_impostor,
                                                  k_impostor_vec[i]))

        # The total number of possible triplets combinations per label comes from
        # taking one of the k_genuine_vec[i] genuine neighbors and one of the
        # k_impostor_vec[i] impostor neighbors for the labels_count[i] elements
        comb_per_label = labels_count * k_genuine_vec * k_impostor_vec

        # Get start and finish for later triplet assigning
        # append zero at the begining for start and get cumulative sum
        start_finish_indices = np.hstack((0, comb_per_label)).cumsum()

        # Total number of triplets is the sum of all possible combinations per
        # label
        num_triplets = start_finish_indices[-1]
        triplets = np.empty((num_triplets, 3), dtype=np.intp)

        neigh = NearestNeighbors()

        for i, label in enumerate(labels):

            # generate mask for current label
            gen_mask = known_labels == label
            gen_indx = np.where(gen_mask)

            # get k_genuine genuine neighbors
            neigh.fit(X=X[gen_indx])
            # Take elements of gen_indx according to the yielded k-neighbors
            gen_relative_indx = neigh.kneighbors(n_neighbors=k_genuine_vec[i],
                                                 return_distance=False)
            gen_neigh = np.take(gen_indx, gen_relative_indx)

            # generate mask for impostors of current label
            imp_indx = np.where(~gen_mask)

            # get k_impostor impostor neighbors
            neigh.fit(X=X[imp_indx])
            # Take elements of imp_indx according to the yielded k-neighbors
            imp_relative_indx = neigh.kneighbors(n_neighbors=k_impostor_vec[i],
                                                 X=X[gen_mask],
                                                 return_distance=False)
            imp_neigh = np.take(imp_indx, imp_relative_indx)

            # length = len_label*k_genuine*k_impostor
            start, finish = start_finish_indices[i:i + 2]

            triplets[start:finish, :] = comb(gen_indx, gen_neigh, imp_neigh,
                                             k_genuine_vec[i],
                                             k_impostor_vec[i])

        return triplets
Beispiel #49
0
 def generate(s):
     ms_set = np.take(self.elements, s, axis=0)
     return self.model_class(ms_set)
Beispiel #50
0
fig, ax = plt.subplots()
ax.plot([0, 1], [0, 1],
        linestyle='--',
        lw=2,
        color='r',
        label='Chance',
        alpha=.8)
val_labels_list = []
val_predicted_prob_list = []
for i, (train_indices,
        val_indices) in enumerate(kfold_cv.split(train_val_text)):

    print("Fold {} of outer crossvalidation".format(i))

    # Split the train and validation set
    train_text = np.take(train_val_text, train_indices)
    train_labels = np.take(train_val_labels, train_indices)

    val_text = np.take(train_val_text, val_indices)
    val_labels = np.take(train_val_labels, val_indices)

    # A distilled model of BERT is used with less parameters as we do not have
    # a lot of data. Preprocessing from text to numeric data is done in the
    # code below in a way designed for the BERT algorithm.
    print("Preprocessing data")
    tf.autograph.set_verbosity(0)
    bert_model = 'distilbert-base-uncased'
    t = ktrain_text.Transformer(bert_model, maxlen=500, class_names=[0, 1])
    train_preprocessed = t.preprocess_train(train_text, train_labels)
    val_preprocessed = t.preprocess_test(val_text, val_labels)
Beispiel #51
0
def group_adjust(in_val, in_groups, in_weights):
    """
    Calculate a group adjustment (demean).

    Parameters
    ----------

    vals    : List of floats/ints

        The original values to adjust

    groups  : List of Lists

        A list of groups. Each group will be a list of ints

    weights : List of floats

        A list of weights for the groupings.

    Returns
    -------

    A list-like demeaned version of the input values
    """

    vals = np.asarray(in_val, dtype=np.float)
    groups = np.asarray(in_groups)
    weights = np.asarray(in_weights)

    # check if # of groups equals to  length of vals
    if (len(groups) != len(weights)):
        raise ValueError("Exception Not Same Size of groups and weights")

    # check if # of groups equals to  length of vals
    for i in range(len(groups)):
        if (len(groups[i]) != len(vals)):
            raise ValueError(
                "Exception Not Same # of elments in vals and groups")

    group_index = 0

    # initialize with intial value in vals

    demeaned = np.asarray(in_val, dtype=np.float)

    # iterate over groups
    for each_group in groups:
        # get count of no of unique item in each group
        unique, counts = np.unique(each_group, return_counts=True)
        # dictonary key = group item and value is freq
        uni_dict = dict(zip(unique, counts))

        for key in uni_dict:

            # get list of positions for each key
            pos_list = np.where(each_group == key)[0]
            #extract values for postions that match
            value_from_pos = np.take(vals, pos_list)

            freq = uni_dict[key]

            # Check for None/np.NaN
            nan_pos = np.argwhere(np.isnan(value_from_pos))
            if (len(nan_pos) > 0):
                freq = freq - len(nan_pos)
            # sum
            total = np.nansum(value_from_pos)
            # mean
            means = np.true_divide(float(total), float(freq))

            weighted_means = np.multiply(float(means),
                                         float(weights[group_index]))

            demeaned[pos_list] -= float(weighted_means)

        group_index += 1

    return demeaned
def histogram_enhancement(im, etype='linear2', target=None, maxCount=255, showHistogram=False, userInputs=False):
    import numpy
    import matplotlib.pyplot as plot

    # Extra arguments showHistogram == True: program will display histogram/CDF from original and modified image
    # userInputs == True: allows you to input user-specified values for certain modification types, such as the
    # cutoff range for the linear2 histogram modification, and whether to do rolled color channels versus individual
    # color channels for histogram matching.
    shape_im = im.shape
    shape_target = target.shape
    histogramFlag = 'rolled'
    if len(
            shape_im) == 2:  # determines if the given image is a 2D greyscale array or 3D array. If 2D, converts to 3D greyscale array
        shape_im_3D = (shape_im[0], shape_im[1], 3)  # for ease of calculation.
        im3D = numpy.zeros(shape_im_3D)
        for n in range(0, 3):
            im3D[:, :, n] = im
            n = n + 1
        im = im3D
    else:
        n = 0
    if len(
            shape_target) == 2:  # determines if the given image is a 2D greyscale array or 3D array. If 2D, converts to 3D greyscale array
        shape_target_3D = (shape_target[0], shape_target[1], 3)  # for ease of calculation.
        target3D = numpy.zeros(shape_target_3D)
        for n in range(0, 3):
            target3D[:, :, n] = target
            n = n + 1
        target = target3D
    else:
        n = 0
    im = im.astype(int)
    # compute original image histograms
    num_bins = maxCount + 1
    counts, bin_edges = numpy.histogram(im, bins=num_bins, range=(0, maxCount), density=False)
    im_pdf = counts / im.size
    im_cdf = numpy.cumsum(counts) / im.size

    if etype == 'linear1':
        # compute rise/run to find slope, where rise = desired range (0,255) and run is current range (DCmin,DCmax).
        # then use slope to find y intercept and come up with a linear LUT
        rise = maxCount
        run = int(numpy.max(im)) - int(numpy.min(im))
        slope = rise / run
        b = 0 - (slope * int(numpy.min(im)))
        LUT = numpy.linspace(0, maxCount, maxCount + 1) * (
            slope) + b  # building the base LUT in the form of LUT = slope(0:255)+b
        LUT = LUT.astype(int)
        n = 0
        for n in range(0, maxCount + 1):  # clipping function for the LUT
            if LUT[n] >= maxCount:
                LUT[n] = maxCount
            elif LUT[n] <= 0:
                LUT[n] = 0
            n = n + 1
        output = numpy.take(LUT, im)  # numpy.take is a very fast LUT applicator.
        output = output.astype(
            numpy.uint8)  # outputting to UINT8 for display. This would need to be changed if we were outputting to a different bit depth.
    elif etype == 'linear2':
        # We need to find the cutoff values. Best way to do this is to subtract the cutoff percentiles (I will do 5 and 95)
        # from the CDF LUT, take the absolute value, and then take the minimum.
        if userInputs == True:  # userInputs flag allows one to specify the boundary if desired.
            trimamount = float(input("Specify the percentage for boundary cutoff (for example, 5% --> 0.05): "))
        else:
            trimamount = 0.02  # The amount being trimmed off either histogram.
        input_lo = trimamount
        input_hi = 1 - trimamount

        CDF_locut = abs(im_cdf - input_lo)
        CDF_hicut = abs(im_cdf - input_hi)
        CDF_locut = numpy.ndarray.tolist(CDF_locut)
        CDF_hicut = numpy.ndarray.tolist(CDF_hicut)

        pos_locut = CDF_locut.index(min(CDF_locut))
        pos_hicut = CDF_hicut.index(min(CDF_hicut))

        rise = maxCount
        run = pos_hicut - pos_locut  # Run is the index of the lowcut and highcut CVs
        # from here on out, same as linear1
        slope = rise / run
        b = 0 - (slope * int(numpy.min(im)))
        LUT = numpy.linspace(0, maxCount, maxCount + 1) * (slope) + b
        LUT = LUT.astype(int)
        n = 0
        for n in range(0, maxCount + 1):
            if LUT[n] >= maxCount:
                LUT[n] = maxCount
            elif LUT[n] <= 0:
                LUT[n] = 0
            n = n + 1

        output = numpy.take(LUT, im)
        output = output.astype(numpy.uint8)

    elif etype == 'equalize':
        # Start by dividing out maximum bit depth to scale between 0 and 1
        LUT = (numpy.linspace(0, 1, maxCount + 1) * im_cdf) * maxCount  # scale by the CDF, then return to 0-255 scale
        LUT = LUT.astype(numpy.uint8)
        output = numpy.take(LUT, im)
        output = output.astype(numpy.uint8)

    elif etype == 'match':
        # create finding function for array indexing
        import numpy as np
        def find_nearest(array, value):  # I found this function on stackoverflow, linking here for transparency:
            array = np.asarray(array)  # https://stackoverflow.com/questions/2566412/find-nearest-value-in-numpy-array
            idx = (np.abs(
                array - value)).argmin()  # The function takes an array, and searches for the closest index to the value given.
            return array[idx]

        if userInputs == True:  # This specifies whether you can do independent channel matching or "rolled together" matching with one histogram.
            # Defaults to rolled together.
            histogramFlag = input(
                "Specify 'rolled' for rolled histogram matching, or 'independent' for channel-independent histogram matching:")

        if histogramFlag == 'rolled':
            # Check flag first
            if len(target.shape) >= 1:
                # This conditional is passing through an image to match histograms if the number of dimensions is greater than 1.
                # Match probability from source CDF to target CDF, take the index value there. This becomes the lookup CV at the source CV at matching probability.
                source_cdf = im_cdf  # Create the original source PDF and CDF from image
                num_bins = maxCount + 1
                target_counts, bin_edges = numpy.histogram(target, bins=num_bins, range=(0, maxCount), density=False)
                bin_edges = numpy.linspace(0, maxCount, maxCount + 1)
                target_pdf = target_counts / target.size
                target_cdf = numpy.cumsum(target_pdf)
                LUT = numpy.zeros(maxCount + 1)
                nearest_target = numpy.zeros(maxCount + 1)
                n = 0
                # Use the probability desired to match to in the find_nearest function, then store this in
                # the "nearest_targets" array to find indices. Indices form the LUT.
                for n in range(0, maxCount + 1):
                    matching_probability = source_cdf[n]
                    nearest_target[n] = find_nearest(target_cdf, matching_probability)
                    n = n + 1
                target_cdf = numpy.ndarray.tolist(target_cdf)
                for n in range(0, maxCount + 1):
                    LUT[n] = target_cdf.index(nearest_target[n])
                    n = n + 1

            else:
                source_cdf = im_cdf  # Same procedure as before, but program detects that the matching image is a pre-built
                target_pdf = target  # LUT by determining the size of the array beforehand.
                target_cdf = numpy.cumsum(target_pdf)
                LUT = numpy.zeros(maxCount + 1)
                nearest_target = numpy.zeros(maxCount + 1)
                n = 0
                for n in range(0, maxCount + 1):
                    matching_probability = source_cdf[n]
                    nearest_target[n] = find_nearest(target_cdf, matching_probability)
                    n = n + 1
                target_cdf = numpy.ndarray.tolist(target_cdf)  # Converting to list such that I can index from the list
                for n in range(0, maxCount + 1):
                    LUT[n] = target_cdf.index(nearest_target[n])
                    n = n + 1
        elif histogramFlag == 'independent':  # Independent channel matching is an experiment more for myself than anything.
            if len(target.shape) >= 1:  # It has the same procedure as rolled matching, but simply computes for three
                num_bins = maxCount + 1  # independent color bands, so additional loops were nested.
                n = 0
                counts = numpy.zeros((3, maxCount + 1))
                target_counts = numpy.zeros((3, maxCount + 1))
                im_pdf = numpy.zeros((3, maxCount + 1))
                target_pdf = numpy.zeros((3, maxCount + 1))
                source_cdf = numpy.zeros((3, maxCount + 1))
                target_cdf = numpy.zeros((3, maxCount + 1))
                LUT = numpy.zeros((3, maxCount + 1))
                nearest_target = numpy.zeros((3, maxCount + 1))
                for n in range(0, 3):
                    counts[n], bin_edges = numpy.histogram(im[:, :, n], bins=num_bins, range=(0, maxCount),
                                                           density=False)
                    im_pdf[n] = counts[n] / (im.size / 3)
                    source_cdf[n] = numpy.cumsum(counts[n]) / (im.size / 3)
                    n = n + 1
                for n in range(0, 3):
                    target_counts[n], bin_edges = numpy.histogram(target[:, :, n], bins=num_bins, range=(0, maxCount),
                                                                  density=False)
                    target_pdf[n] = target_counts[n] / (target.size / 3)
                    target_cdf[n] = numpy.cumsum(target_pdf[n])
                    n = n + 1

                for n in range(0, 3):
                    cdf_list = target_cdf
                    for m in range(0, maxCount + 1):
                        matching_probability = source_cdf[n, m]
                        nearest_target[n, m] = find_nearest(target_cdf[n, :], matching_probability)
                        m = m + 1
                    cdf_list = numpy.ndarray.tolist(cdf_list[n, :])
                    for m in range(0, maxCount + 1):
                        LUT[n, m] = cdf_list.index(nearest_target[n, m])
                        m = m + 1
                    n = n + 1
            else:
                print("Error: Must pass in image for per-channel matching to work")
                exit()
        LUT = LUT.astype(numpy.uint8)
        output = numpy.take(LUT, im)
        output = output.astype(numpy.uint8)

    if showHistogram == True:  # this is an optional conditional that will permit you to view the histograms before and
        if histogramFlag == 'independent':  # after enhancement if desired. Defaults to off.
            print("Still working on independent channel histograms!")
            return output
        # create output histograms for reference
        num_bins = maxCount + 1
        output_counts, bin_edges = numpy.histogram(output, bins=num_bins, range=(0, maxCount), density=False)
        bin_edges = numpy.linspace(0, maxCount, maxCount + 1)
        output_pdf = output_counts / output.size
        output_cdf = numpy.cumsum(output_counts) / output.size

        fig, axs = plot.subplots(4, 1, sharex=True)
        plot.suptitle('Distributions Before and After Enhancement', horizontalalignment='center',
                      verticalalignment='top')
        fig.subplots_adjust(hspace=0.25)

        axs[0].plot(bin_edges[0:], im_pdf, '-b', label='Plot of Original PDF')
        axs[0].set_xlim(0, 255)

        axs[1].plot(bin_edges[0:], im_cdf, '-r', label='Plot of Original CDF')
        axs[1].set_xlim(0, 255)
        axs[1].set_ylim(0, 1)

        axs[2].plot(bin_edges[0:], output_pdf, '-y', label='PDF enhanced via: {etype}')
        axs[2].set_xlim(0, 255)

        axs[3].plot(bin_edges[0:], output_cdf, '-g', label='CDF enhanced via: {etype}')
        axs[3].set_xlim(0, 255)
        axs[3].set_ylim(0, 1)
        axs[3].set_xlabel('Digital Count')

        fig.legend(loc='center right', fontsize='x-small')
        plot.show()

    return output
Beispiel #53
0
    def check_fun_data(self,
                       testfunc,
                       targfunc,
                       testarval,
                       targarval,
                       targarnanval,
                       check_dtype=True,
                       empty_targfunc=None,
                       **kwargs):
        for axis in list(range(targarval.ndim)) + [None]:
            for skipna in [False, True]:
                targartempval = targarval if skipna else targarnanval
                if skipna and empty_targfunc and isna(targartempval).all():
                    targ = empty_targfunc(targartempval, axis=axis, **kwargs)
                else:
                    targ = targfunc(targartempval, axis=axis, **kwargs)

                try:
                    res = testfunc(testarval,
                                   axis=axis,
                                   skipna=skipna,
                                   **kwargs)
                    self.check_results(targ,
                                       res,
                                       axis,
                                       check_dtype=check_dtype)
                    if skipna:
                        res = testfunc(testarval, axis=axis, **kwargs)
                        self.check_results(targ,
                                           res,
                                           axis,
                                           check_dtype=check_dtype)
                    if axis is None:
                        res = testfunc(testarval, skipna=skipna, **kwargs)
                        self.check_results(targ,
                                           res,
                                           axis,
                                           check_dtype=check_dtype)
                    if skipna and axis is None:
                        res = testfunc(testarval, **kwargs)
                        self.check_results(targ,
                                           res,
                                           axis,
                                           check_dtype=check_dtype)
                except BaseException as exc:
                    exc.args += ('axis: %s of %s' % (axis, testarval.ndim - 1),
                                 'skipna: %s' % skipna, 'kwargs: %s' % kwargs)
                    raise

        if testarval.ndim <= 1:
            return

        try:
            testarval2 = np.take(testarval, 0, axis=-1)
            targarval2 = np.take(targarval, 0, axis=-1)
            targarnanval2 = np.take(targarnanval, 0, axis=-1)
        except ValueError:
            return
        self.check_fun_data(testfunc,
                            targfunc,
                            testarval2,
                            targarval2,
                            targarnanval2,
                            check_dtype=check_dtype,
                            empty_targfunc=empty_targfunc,
                            **kwargs)
def SortEigenDecomposition(e, v):
    if v.ndim < 2:
        return e, v
    else:
        perm = np.argsort(e, -1)
        return np.take(e, perm, -1), np.take(v, perm, -1)
def samp_entropy(a, m, r, tau=1, relative_r=True):

    """
    Compute the sample entropy [RIC00]_ of a signal with embedding dimension `de` and delay `tau` [PYEEG]_.
    Vectorised version of the eponymous PyEEG function.
    In addition, this function can also be used to vary tau and therefore compute Multi-Scale Entropy(MSE) [COS05]_ by
    coarse grainning the time series (see example bellow).
    By default, r is expressed as relatively to the standard deviation of the signal.
    :param a: a one dimensional floating-point array representing a time series.
    :type a: :class:`~numpy.ndarray` or :class:`~pyrem.time_series.Signal`
    :param m: the scale
    :type m: int
    :param r: The tolerance
    :type r: float
    :param tau: The scale for coarse grainning.
    :type tau: int
    :param relative_r: whether the argument r is relative to the standard deviation. If false, an absolute value should be given for r.
    :type relative_r: true
    :return: the approximate entropy, a scalar
    :rtype: float
    Example:
    """

    if len(a) <= 2:
        return np.nan

    coarse_a = _coarse_grainning(a, tau)

    if relative_r:
        coarse_a /= np.std(coarse_a)

    embsp = _embed_seq(coarse_a, 1, m + 1)
    embsp_last = embsp[:, -1]
    embs_mini = embsp[:, :-1]

    # Buffers are preallocated chunks of memory storing temporary results.
    # see the `out` argument in numpy *ufun* documentation

    dist_buffer = np.zeros(embsp.shape[0] - 1, dtype=np.float32)
    subtract_buffer = np.zeros((dist_buffer.size, m), dtype=np.float32)
    in_range_buffer = np.zeros_like(dist_buffer, dtype=np.bool)
    sum_cm, sum_cmp = 0.0, 0.0

    # we iterate through all templates (rows), except last one.
    for i, template in enumerate(embs_mini[:-1]):

        # these are just views to the buffer arrays. to store intermediary matrices
        dist_b_view = dist_buffer[i:]
        sub_b_view = subtract_buffer[i:]
        range_b_view = in_range_buffer[i:]
        embsp_view = embsp_last[i+1:]

        # substract the template from each subsequent row of the embedded matrix
        np.subtract(embs_mini[i+1:],  template, out=sub_b_view)
        # Absolute distance
        np.abs(sub_b_view, out=sub_b_view)
        # Maximal absolute difference between a scroll and a template is the distance
        np.max(sub_b_view, axis=1, out=dist_b_view)
        # we compare this distance to a tolerance r
        np.less_equal(dist_b_view, r, out=range_b_view)
        # score one for this template for each match
        in_range_sum = np.sum(range_b_view)
        sum_cm += in_range_sum

        # reuse the buffers for last column
        dist_b_view = dist_buffer[:in_range_sum]

        where = np.flatnonzero(range_b_view)
        dist_b_view = np.take(embsp_view, where, out=dist_b_view)
        range_b_view = in_range_buffer[range_b_view]
        # score one to TODO for each match of the last element
        dist_b_view -= embsp_last[i]
        np.abs(dist_b_view, out=dist_b_view)
        np.less_equal(dist_b_view, r, out=range_b_view)
        sum_cmp += np.sum(range_b_view)

    if sum_cm == 0 or sum_cmp ==0:
        return np.NaN

    return np.log(sum_cm/sum_cmp)
Beispiel #56
0
def leastsq(func,
            x0,
            args=(),
            Dfun=None,
            full_output=0,
            col_deriv=0,
            ftol=1.49012e-8,
            xtol=1.49012e-8,
            gtol=0.0,
            maxfev=0,
            epsfcn=None,
            factor=100,
            diag=None):
    """
    Minimize the sum of squares of a set of equations.

    ::

        x = arg min(sum(func(y)**2,axis=0))
                 y

    Parameters
    ----------
    func : callable
        should take at least one (possibly length N vector) argument and
        returns M floating point numbers. It must not return NaNs or
        fitting might fail.
    x0 : ndarray
        The starting estimate for the minimization.
    args : tuple, optional
        Any extra arguments to func are placed in this tuple.
    Dfun : callable, optional
        A function or method to compute the Jacobian of func with derivatives
        across the rows. If this is None, the Jacobian will be estimated.
    full_output : bool, optional
        non-zero to return all optional outputs.
    col_deriv : bool, optional
        non-zero to specify that the Jacobian function computes derivatives
        down the columns (faster, because there is no transpose operation).
    ftol : float, optional
        Relative error desired in the sum of squares.
    xtol : float, optional
        Relative error desired in the approximate solution.
    gtol : float, optional
        Orthogonality desired between the function vector and the columns of
        the Jacobian.
    maxfev : int, optional
        The maximum number of calls to the function. If `Dfun` is provided
        then the default `maxfev` is 100*(N+1) where N is the number of elements
        in x0, otherwise the default `maxfev` is 200*(N+1).
    epsfcn : float, optional
        A variable used in determining a suitable step length for the forward-
        difference approximation of the Jacobian (for Dfun=None).
        Normally the actual step length will be sqrt(epsfcn)*x
        If epsfcn is less than the machine precision, it is assumed that the
        relative errors are of the order of the machine precision.
    factor : float, optional
        A parameter determining the initial step bound
        (``factor * || diag * x||``). Should be in interval ``(0.1, 100)``.
    diag : sequence, optional
        N positive entries that serve as a scale factors for the variables.

    Returns
    -------
    x : ndarray
        The solution (or the result of the last iteration for an unsuccessful
        call).
    cov_x : ndarray
        Uses the fjac and ipvt optional outputs to construct an
        estimate of the jacobian around the solution. None if a
        singular matrix encountered (indicates very flat curvature in
        some direction).  This matrix must be multiplied by the
        residual variance to get the covariance of the
        parameter estimates -- see curve_fit.
    infodict : dict
        a dictionary of optional outputs with the key s:

        ``nfev``
            The number of function calls
        ``fvec``
            The function evaluated at the output
        ``fjac``
            A permutation of the R matrix of a QR
            factorization of the final approximate
            Jacobian matrix, stored column wise.
            Together with ipvt, the covariance of the
            estimate can be approximated.
        ``ipvt``
            An integer array of length N which defines
            a permutation matrix, p, such that
            fjac*p = q*r, where r is upper triangular
            with diagonal elements of nonincreasing
            magnitude. Column j of p is column ipvt(j)
            of the identity matrix.
        ``qtf``
            The vector (transpose(q) * fvec).

    mesg : str
        A string message giving information about the cause of failure.
    ier : int
        An integer flag.  If it is equal to 1, 2, 3 or 4, the solution was
        found.  Otherwise, the solution was not found. In either case, the
        optional output variable 'mesg' gives more information.

    Notes
    -----
    "leastsq" is a wrapper around MINPACK's lmdif and lmder algorithms.

    cov_x is a Jacobian approximation to the Hessian of the least squares
    objective function.
    This approximation assumes that the objective function is based on the
    difference between some observed target data (ydata) and a (non-linear)
    function of the parameters `f(xdata, params)` ::

           func(params) = ydata - f(xdata, params)

    so that the objective function is ::

           min   sum((ydata - f(xdata, params))**2, axis=0)
         params

    """
    x0 = asarray(x0).flatten()
    n = len(x0)
    if not isinstance(args, tuple):
        args = (args, )
    shape, dtype = minpack._check_func('leastsq', 'func', func, x0, args, n)
    m = shape[0]
    # if n > m:
    #     raise TypeError('Improper input: N=%s must not exceed M=%s' % (n, m))
    if epsfcn is None:
        epsfcn = finfo(dtype).eps
    if Dfun is None:
        if maxfev == 0:
            maxfev = 200 * (n + 1)
        retval = minpack._minpack._lmdif(func, x0, args, full_output, ftol,
                                         xtol, gtol, maxfev, epsfcn, factor,
                                         diag)
    else:
        if col_deriv:
            minpack._check_func('leastsq', 'Dfun', Dfun, x0, args, n, (n, m))
        else:
            minpack._check_func('leastsq', 'Dfun', Dfun, x0, args, n, (m, n))
        if maxfev == 0:
            maxfev = 100 * (n + 1)
        retval = minpack._minpack._lmder(func, Dfun, x0, args, full_output,
                                         col_deriv, ftol, xtol, gtol, maxfev,
                                         factor, diag)

    errors = {
        0: ["Improper input parameters.", TypeError],
        1: [
            "Both actual and predicted relative reductions "
            "in the sum of squares\n  are at most %f" % ftol, None
        ],
        2: [
            "The relative error between two consecutive "
            "iterates is at most %f" % xtol, None
        ],
        3: [
            "Both actual and predicted relative reductions in "
            "the sum of squares\n  are at most %f and the "
            "relative error between two consecutive "
            "iterates is at \n  most %f" % (ftol, xtol), None
        ],
        4: [
            "The cosine of the angle between func(x) and any "
            "column of the\n  Jacobian is at most %f in "
            "absolute value" % gtol, None
        ],
        5: [
            "Number of calls to function has reached "
            "maxfev = %d." % maxfev, ValueError
        ],
        6: [
            "ftol=%f is too small, no further reduction "
            "in the sum of squares\n  is possible."
            "" % ftol, ValueError
        ],
        7: [
            "xtol=%f is too small, no further improvement in "
            "the approximate\n  solution is possible." % xtol, ValueError
        ],
        8: [
            "gtol=%f is too small, func(x) is orthogonal to the "
            "columns of\n  the Jacobian to machine "
            "precision." % gtol, ValueError
        ],
        'unknown': ["Unknown error.", TypeError]
    }

    info = retval[-1]  # The FORTRAN return value

    if info not in [1, 2, 3, 4] and not full_output:
        if info in [5, 6, 7, 8]:
            minpack.warnings.warn(errors[info][0], RuntimeWarning)
        else:
            try:
                raise errors[info][1](errors[info][0])
            except KeyError:
                raise errors['unknown'][1](errors['unknown'][0])

    mesg = errors[info][0]
    if full_output:
        cov_x = None
        if info in [1, 2, 3, 4]:
            from numpy.dual import inv
            from numpy.linalg import LinAlgError
            perm = take(eye(n), retval[1]['ipvt'] - 1, 0)
            r = triu(transpose(retval[1]['fjac'])[:n, :])
            R = dot(r, perm)
            try:
                cov_x = inv(dot(transpose(R), R))
            except (LinAlgError, ValueError):
                pass
        return (retval[0], cov_x) + retval[1:-1] + (mesg, info)
    else:
        return (retval[0], info)
Beispiel #57
0
def fdrcorrection_twostage(pvals,
                           alpha=0.05,
                           method='bky',
                           iter=False,
                           is_sorted=False):
    '''(iterated) two stage linear step-up procedure with estimation of number of true
    hypotheses

    Benjamini, Krieger and Yekuteli, procedure in Definition 6

    Parameters
    ----------
    pvals : array_like
        set of p-values of the individual tests.
    alpha : float
        error rate
    method : {'bky', 'bh')
         see Notes for details

        'bky' : implements the procedure in Definition 6 of Benjamini, Krieger
           and Yekuteli 2006
        'bh' : implements the two stage method of Benjamini and Hochberg

    iter ; bool

    Returns
    -------
    rejected : array, bool
        True if a hypothesis is rejected, False if not
    pvalue-corrected : array
        pvalues adjusted for multiple hypotheses testing to limit FDR
    m0 : int
        ntest - rej, estimated number of true hypotheses
    alpha_stages : list of floats
        A list of alphas that have been used at each stage

    Notes
    -----
    The returned corrected p-values are specific to the given alpha, they
    cannot be used for a different alpha.

    The returned corrected p-values are from the last stage of the fdr_bh
    linear step-up procedure (fdrcorrection0 with method='indep') corrected
    for the estimated fraction of true hypotheses.
    This means that the rejection decision can be obtained with
    ``pval_corrected <= alpha``, where ``alpha`` is the origianal significance
    level.
    (Note: This has changed from earlier versions (<0.5.0) of statsmodels.)

    BKY described several other multi-stage methods, which would be easy to implement.
    However, in their simulation the simple two-stage method (with iter=False) was the
    most robust to the presence of positive correlation

    TODO: What should be returned?

    '''
    pvals = np.asarray(pvals)

    if not is_sorted:
        pvals_sortind = np.argsort(pvals)
        pvals = np.take(pvals, pvals_sortind)

    ntests = len(pvals)
    if method == 'bky':
        fact = (1. + alpha)
        alpha_prime = alpha / fact
    elif method == 'bh':
        fact = 1.
        alpha_prime = alpha
    else:
        raise ValueError("only 'bky' and 'bh' are available as method")

    alpha_stages = [alpha_prime]
    rej, pvalscorr = fdrcorrection(pvals,
                                   alpha=alpha_prime,
                                   method='indep',
                                   is_sorted=True)
    r1 = rej.sum()
    if (r1 == 0) or (r1 == ntests):
        return rej, pvalscorr * fact, ntests - r1, alpha_stages
    ri_old = r1

    while True:
        ntests0 = 1.0 * ntests - ri_old
        alpha_star = alpha_prime * ntests / ntests0
        alpha_stages.append(alpha_star)
        #print ntests0, alpha_star
        rej, pvalscorr = fdrcorrection(pvals,
                                       alpha=alpha_star,
                                       method='indep',
                                       is_sorted=True)
        ri = rej.sum()
        if (not iter) or ri == ri_old:
            break
        elif ri < ri_old:
            # prevent cycles and endless loops
            raise RuntimeError(" oops - shouldn't be here")
        ri_old = ri

    # make adjustment to pvalscorr to reflect estimated number of Non-Null cases
    # decision is then pvalscorr < alpha  (or <=)
    pvalscorr *= ntests0 * 1.0 / ntests
    if method == 'bky':
        pvalscorr *= (1. + alpha)

    if not is_sorted:
        pvalscorr_ = np.empty_like(pvalscorr)
        pvalscorr_[pvals_sortind] = pvalscorr
        del pvalscorr
        reject = np.empty_like(rej)
        reject[pvals_sortind] = rej
        return reject, pvalscorr_, ntests - ri, alpha_stages
    else:
        return rej, pvalscorr, ntests - ri, alpha_stages
    def extract_particles(self, segmentation):
        """
        Saves particle centers into output .star file, afetr dismissing regions 
        that are too big to contain a particle.
        
        Args:
            segmentation: Segmentation of the micrograph into noise and particle projections.
        """
        segmentation = segmentation[self.query_size // 2 - 1:-self.query_size // 2,
                                    self.query_size // 2 - 1:-self.query_size // 2]
        labeled_segments, _ = ndimage.label(segmentation, np.ones((3, 3)))
        values, repeats = np.unique(labeled_segments, return_counts=True)

        values_to_remove = np.where(repeats > self.max_size ** 2)
        values = np.take(values, values_to_remove)
        values = np.reshape(values, (1, 1, np.prod(values.shape)), 'F')

        labeled_segments = np.reshape(labeled_segments, (labeled_segments.shape[0],
                                                         labeled_segments.shape[1], 1), 'F')
        matrix1 = np.repeat(labeled_segments, values.shape[2], 2)
        matrix2 = np.repeat(values, matrix1.shape[0], 0)
        matrix2 = np.repeat(matrix2, matrix1.shape[1], 1)

        matrix3 = np.equal(matrix1, matrix2)
        matrix4 = np.sum(matrix3, 2)

        segmentation[np.where(matrix4 == 1)] = 0
        labeled_segments, _ = ndimage.label(segmentation, np.ones((3, 3)))

        max_val = np.amax(np.reshape(labeled_segments, (np.prod(labeled_segments.shape))))
        center = center_of_mass(segmentation, labeled_segments, np.arange(1, max_val))
        center = np.rint(center)

        img = np.zeros((segmentation.shape[0], segmentation.shape[1]))
        img[center[:, 0].astype(int), center[:, 1].astype(int)] = 1
        y, x = np.ogrid[-self.moa:self.moa+1, -self.moa:self.moa+1]
        element = x*x+y*y <= self.moa * self.moa
        img = binary_dilation(img, structure=element)
        labeled_img, _ = ndimage.label(img, np.ones((3, 3)))
        values, repeats = np.unique(labeled_img, return_counts=True)
        y = np.where(repeats == np.count_nonzero(element))
        y = np.array(y)
        y = y.astype(int)
        y = np.reshape(y, (np.prod(y.shape)), 'F')
        y -= 1
        center = center[y, :]

        center = center + (self.query_size // 2 - 1) * np.ones(center.shape)
        center = center + (self.query_size // 2 - 1) * np.ones(center.shape)
        center = center + np.ones(center.shape)
        center = 2 * center
        center = center + 99 * np.ones(center.shape)

        # swap columns to align with Relion
        col_2 = center[:, 1].copy()
        center[:, 1] = center[:, 0]
        center[:, 0] = col_2[:]

        basename = os.path.basename(self.filenames)
        name_str, ext = os.path.splitext(basename)

        applepick_path = os.path.join(self.output_directory, "{}_applepick.star".format(name_str))
        with open(applepick_path, "w") as f:
            np.savetxt(f, ["data_root\n\nloop_\n_rlnCoordinateX #1\n_rlnCoordinateY #2"], fmt='%s')
            np.savetxt(f, center, fmt='%d %d')
            
        return center
Beispiel #59
0
def fdrcorrection(pvals, alpha=0.05, method='indep', is_sorted=False):
    '''pvalue correction for false discovery rate

    This covers Benjamini/Hochberg for independent or positively correlated and
    Benjamini/Yekutieli for general or negatively correlated tests. Both are
    available in the function multipletests, as method=`fdr_bh`, resp. `fdr_by`.

    Parameters
    ----------
    pvals : array_like
        set of p-values of the individual tests.
    alpha : float
        error rate
    method : {'indep', 'negcorr')

    Returns
    -------
    rejected : array, bool
        True if a hypothesis is rejected, False if not
    pvalue-corrected : array
        pvalues adjusted for multiple hypothesis testing to limit FDR

    Notes
    -----

    If there is prior information on the fraction of true hypothesis, then alpha
    should be set to alpha * m/m_0 where m is the number of tests,
    given by the p-values, and m_0 is an estimate of the true hypothesis.
    (see Benjamini, Krieger and Yekuteli)

    The two-step method of Benjamini, Krieger and Yekutiel that estimates the number
    of false hypotheses will be available (soon).

    Method names can be abbreviated to first letter, 'i' or 'p' for fdr_bh and 'n' for
    fdr_by.



    '''
    pvals = np.asarray(pvals)

    if not is_sorted:
        pvals_sortind = np.argsort(pvals)
        pvals_sorted = np.take(pvals, pvals_sortind)
    else:
        pvals_sorted = pvals  # alias

    if method in ['i', 'indep', 'p', 'poscorr']:
        ecdffactor = _ecdf(pvals_sorted)
    elif method in ['n', 'negcorr']:
        cm = np.sum(1. / np.arange(1, len(pvals_sorted) + 1))  #corrected this
        ecdffactor = _ecdf(pvals_sorted) / cm


##    elif method in ['n', 'negcorr']:
##        cm = np.sum(np.arange(len(pvals)))
##        ecdffactor = ecdf(pvals_sorted)/cm
    else:
        raise ValueError('only indep and negcorr implemented')
    reject = pvals_sorted <= ecdffactor * alpha
    if reject.any():
        rejectmax = max(np.nonzero(reject)[0])
        reject[:rejectmax] = True

    pvals_corrected_raw = pvals_sorted / ecdffactor
    pvals_corrected = np.minimum.accumulate(pvals_corrected_raw[::-1])[::-1]
    del pvals_corrected_raw
    pvals_corrected[pvals_corrected > 1] = 1
    if not is_sorted:
        pvals_corrected_ = np.empty_like(pvals_corrected)
        pvals_corrected_[pvals_sortind] = pvals_corrected
        del pvals_corrected
        reject_ = np.empty_like(reject)
        reject_[pvals_sortind] = reject
        return reject_, pvals_corrected_
    else:
        return reject, pvals_corrected
Beispiel #60
0
def multipletests(pvals,
                  alpha=0.05,
                  method='hs',
                  is_sorted=False,
                  returnsorted=False):
    '''test results and p-value correction for multiple tests


    Parameters
    ----------
    pvals : array_like
        uncorrected p-values
    alpha : float
        FWER, family-wise error rate, e.g. 0.1
    method : string
        Method used for testing and adjustment of pvalues. Can be either the
        full name or initial letters. Available methods are ::

        `bonferroni` : one-step correction
        `sidak` : one-step correction
        `holm-sidak` : step down method using Sidak adjustments
        `holm` : step-down method using Bonferroni adjustments
        `simes-hochberg` : step-up method  (independent)
        `hommel` : closed method based on Simes tests (non-negative)
        `fdr_bh` : Benjamini/Hochberg  (non-negative)
        `fdr_by` : Benjamini/Yekutieli (negative)
        `fdr_tsbh` : two stage fdr correction (non-negative)
        `fdr_tsbky` : two stage fdr correction (non-negative)

    is_sorted : bool
        If False (default), the p_values will be sorted, but the corrected
        pvalues are in the original order. If True, then it assumed that the
        pvalues are already sorted in ascending order.
    returnsorted : bool
         not tested, return sorted p-values instead of original sequence

    Returns
    -------
    reject : array, boolean
        true for hypothesis that can be rejected for given alpha
    pvals_corrected : array
        p-values corrected for multiple tests
    alphacSidak: float
        corrected alpha for Sidak method
    alphacBonf: float
        corrected alpha for Bonferroni method

    Notes
    -----
    There may be API changes for this function in the future.

    Except for 'fdr_twostage', the p-value correction is independent of the
    alpha specified as argument. In these cases the corrected p-values
    can also be compared with a different alpha. In the case of 'fdr_twostage',
    the corrected p-values are specific to the given alpha, see
    ``fdrcorrection_twostage``.

    The 'fdr_gbs' procedure is not verified against another package, p-values
    are derived from scratch and are not derived in the reference. In Monte
    Carlo experiments the method worked correctly and maintained the false
    discovery rate.

    All procedures that are included, control FWER or FDR in the independent
    case, and most are robust in the positively correlated case.

    `fdr_gbs`: high power, fdr control for independent case and only small
    violation in positively correlated case

    **Timing**:

    Most of the time with large arrays is spent in `argsort`. When
    we want to calculate the p-value for several methods, then it is more
    efficient to presort the pvalues, and put the results back into the
    original order outside of the function.

    Method='hommel' is very slow for large arrays, since it requires the
    evaluation of n partitions, where n is the number of p-values.
    '''
    import gc
    pvals = np.asarray(pvals)
    alphaf = alpha  # Notation ?

    if not is_sorted:
        sortind = np.argsort(pvals)
        pvals = np.take(pvals, sortind)

    ntests = len(pvals)
    alphacSidak = 1 - np.power((1. - alphaf), 1. / ntests)
    alphacBonf = alphaf / float(ntests)
    if method.lower() in ['b', 'bonf', 'bonferroni']:
        reject = pvals <= alphacBonf
        pvals_corrected = pvals * float(ntests)

    elif method.lower() in ['s', 'sidak']:
        reject = pvals <= alphacSidak
        pvals_corrected = 1 - np.power((1. - pvals), ntests)

    elif method.lower() in ['hs', 'holm-sidak']:
        alphacSidak_all = 1 - np.power(
            (1. - alphaf), 1. / np.arange(ntests, 0, -1))
        notreject = pvals > alphacSidak_all
        del alphacSidak_all

        nr_index = np.nonzero(notreject)[0]
        if nr_index.size == 0:
            # nonreject is empty, all rejected
            notrejectmin = len(pvals)
        else:
            notrejectmin = np.min(nr_index)
        notreject[notrejectmin:] = True
        reject = ~notreject
        del notreject

        pvals_corrected_raw = 1 - np.power(
            (1. - pvals), np.arange(ntests, 0, -1))
        pvals_corrected = np.maximum.accumulate(pvals_corrected_raw)
        del pvals_corrected_raw

    elif method.lower() in ['h', 'holm']:
        notreject = pvals > alphaf / np.arange(ntests, 0, -1)
        nr_index = np.nonzero(notreject)[0]
        if nr_index.size == 0:
            # nonreject is empty, all rejected
            notrejectmin = len(pvals)
        else:
            notrejectmin = np.min(nr_index)
        notreject[notrejectmin:] = True
        reject = ~notreject
        pvals_corrected_raw = pvals * np.arange(ntests, 0, -1)
        pvals_corrected = np.maximum.accumulate(pvals_corrected_raw)
        del pvals_corrected_raw
        gc.collect()

    elif method.lower() in ['sh', 'simes-hochberg']:
        alphash = alphaf / np.arange(ntests, 0, -1)
        reject = pvals <= alphash
        rejind = np.nonzero(reject)
        if rejind[0].size > 0:
            rejectmax = np.max(np.nonzero(reject))
            reject[:rejectmax] = True
        pvals_corrected_raw = np.arange(ntests, 0, -1) * pvals
        pvals_corrected = np.minimum.accumulate(
            pvals_corrected_raw[::-1])[::-1]
        del pvals_corrected_raw

    elif method.lower() in ['ho', 'hommel']:
        # we need a copy because we overwrite it in a loop
        a = pvals.copy()
        for m in range(ntests, 1, -1):
            cim = np.min(m * pvals[-m:] / np.arange(1, m + 1.))
            a[-m:] = np.maximum(a[-m:], cim)
            a[:-m] = np.maximum(a[:-m], np.minimum(m * pvals[:-m], cim))
        pvals_corrected = a
        reject = a <= alphaf

    elif method.lower() in ['fdr_bh', 'fdr_i', 'fdr_p', 'fdri', 'fdrp']:
        # delegate, call with sorted pvals
        reject, pvals_corrected = fdrcorrection(pvals,
                                                alpha=alpha,
                                                method='indep',
                                                is_sorted=True)
    elif method.lower() in ['fdr_by', 'fdr_n', 'fdr_c', 'fdrn', 'fdrcorr']:
        # delegate, call with sorted pvals
        reject, pvals_corrected = fdrcorrection(pvals,
                                                alpha=alpha,
                                                method='n',
                                                is_sorted=True)
    elif method.lower() in ['fdr_tsbky', 'fdr_2sbky', 'fdr_twostage']:
        # delegate, call with sorted pvals
        reject, pvals_corrected = fdrcorrection_twostage(pvals,
                                                         alpha=alpha,
                                                         method='bky',
                                                         is_sorted=True)[:2]
    elif method.lower() in ['fdr_tsbh', 'fdr_2sbh']:
        # delegate, call with sorted pvals
        reject, pvals_corrected = fdrcorrection_twostage(pvals,
                                                         alpha=alpha,
                                                         method='bh',
                                                         is_sorted=True)[:2]

    elif method.lower() in ['fdr_gbs']:
        #adaptive stepdown in Gavrilov, Benjamini, Sarkar, Annals of Statistics 2009
        ##        notreject = pvals > alphaf / np.arange(ntests, 0, -1) #alphacSidak
        ##        notrejectmin = np.min(np.nonzero(notreject))
        ##        notreject[notrejectmin:] = True
        ##        reject = ~notreject

        ii = np.arange(1, ntests + 1)
        q = (ntests + 1. - ii) / ii * pvals / (1. - pvals)
        pvals_corrected_raw = np.maximum.accumulate(q)  #up requirementd

        pvals_corrected = np.minimum.accumulate(
            pvals_corrected_raw[::-1])[::-1]
        del pvals_corrected_raw
        reject = pvals_corrected <= alpha

    else:
        raise ValueError('method not recognized')

    if not pvals_corrected is None:  #not necessary anymore
        pvals_corrected[pvals_corrected > 1] = 1
    if is_sorted or returnsorted:
        return reject, pvals_corrected, alphacSidak, alphacBonf
    else:
        pvals_corrected_ = np.empty_like(pvals_corrected)
        pvals_corrected_[sortind] = pvals_corrected
        del pvals_corrected
        reject_ = np.empty_like(reject)
        reject_[sortind] = reject
        return reject_, pvals_corrected_, alphacSidak, alphacBonf