Beispiel #1
0
def filter_phase(log, pipeline, f_min, f_max, fd, pd, n_stop=0):
    transitions = aer_pipeline_transitions1_all(log, pipeline)
    stream = aer_filtered_cutoff(transitions, f_min, f_max)
    
    P = np.zeros((fd, pd))
    frequencies = np.linspace(f_min, f_max, fd)
    phases = np.linspace(0, 1, pd)
    
    count = 0
    for ae in stream:
        f = ae['frequency']
        t = ae['timestamp']
        delta = 1 / f
        s = t / delta
        phase = s - np.floor(s)
        i = np.digitize([f], frequencies) - 1
        j = np.digitize([phase], phases) - 1
        P[i, j] += 1
    
        count += 1
        if n_stop != 0 and count >= n_stop:
            break
            
    stats = {}
    stats['P'] = P
    return stats
Beispiel #2
0
def bin2d(x, y, xbins=10, ybins=10):
    """
    2-dimensional binning of x, y
    Works as a 2-D extension of numpy.digitize but also automatically sets-up
    binedges
    
    Parameters
    ----------
    
    x, y : array-like
        x, y values to bin according to
    xbins, ybins : int OR list/array like
        Either the number of bins or the binedges to use
    
    Returns
    -------
    
    ind : list of arrays
        The x,y bin indices each entry belongs to.  ind[0][i] gives the x-bin
        of the ith entry.  ind[1][i] gives the y-bin of the ith entry
    xedges, yedges: arrays
        Bin edges used
    """

    xedges = setupbins(x, xbins)
    yedges = setupbins(y, ybins)

    xind = np.digitize(x, xedges) - 1
    yind = np.digitize(y, yedges) - 1
    ind = [xind, yind]
    return ind, xedges, yedges
Beispiel #3
0
def hist(x, y, numbins, ion):

    # Make the bins
    xbins = np.linspace(xlims[0], xlims[1], nbins+1)
    ybins = np.linspace(ylims[0], ylims[1], nbins+1)

    # Determine what cells go in what bins
    xdig = np.digitize(x, xbins)
    ydig = np.digitize(y, ybins)

    # Fix the edge effects
    maxBinNum = len(xbins)
    for i in range(len(xdig)):
        if xdig[i]==maxBinNum:
            xdig[i] -= 1
        if ydig[i]==maxBinNum:
            ydig[i] -= 1
    
    # Create empty array
    h = np.zeros((nbins, nbins))

    # Loop through array
    for i in range(nbins):
        for j in range(nbins):
            # Find the indicies where x and y belong to this bin
            bits = np.bitwise_and( xdig==i+1, ydig==j+1)
            if True in bits:
                h[i,j] = np.log10( np.sum( bits ) )

    h = np.rot90(h)
    h = np.flipud(h)
    return h, xbins, ybins
Beispiel #4
0
def group(angle, wind, bsp, speedbins, anglebins, fct=np.median):
    '''Group data in bins according to wind angle and wind speed.

    Parameters
    ----------
    angle : np.ndarry
        Wind angles in degrees
    wind : np.ndarray
        wind speed in kn
    bsp : np.ndarray
        Boat speed in kn
    speedbins : ndarray
        bin boundaries for speed binning
    anglebins : ndarray
        bin boundaries for angle binning.
        Make sure that 180. is included in last bin and not on the boundary.
    fct : function
        Given all bsp values in one (speedbin,anglebin) select on value to
        be used. Common examples are np.median or np.mean

    Returns
    -------
    polar : ndarray([len(speedbins)+1, len(anglebins)])
        This contains the data array with one speed for each (speedbin, anglebin)
    '''
    if (angle.shape != wind.shape) or (angle.shape != bsp.shape):
        raise ValueError('angle, wind and bsp must have same number of elements')

    digspeed = np.digitize(wind, speedbins)
    digangle = np.digitize(np.abs(angle), anglebins)
    polar = np.zeros([len(speedbins)+1, len(anglebins)])
    for i in np.arange(1, len(speedbins)+1):
        for j in np.arange(1, len(anglebins)):
            polar[i, j] = fct(bsp[(digspeed == i) & (digangle == j)])
    return polar
Beispiel #5
0
def means2idxarrays(g1, g2, i_bins, c_bins, difference):
    '''take two arrays of values and return the initial values
    and differences as numpy digitised arrays'''

    if difference == "relative":
        # calculate difference between mean values for group1 and group2
        # g1 and g2 always the same length
        change = [g2[x] - g1[x] for x in range(0, len(g1))]
        initial = g1

    elif difference == "logfold":
        change = [np.log2((g2[x] + 1.0) / (g1[x] + 1.0))
                  for x in range(0, len(g1))]
        initial = [np.log2(g1[x] + 1.0) for x in range(0, len(g1))]

    elif difference == "abs_logfold":
        change = [abs(np.log2((g2[x] + 1.0) / (g1[x] + 1.0)))
                  for x in range(0, len(g1))]
        initial = [max(np.log2(g1[x] + 1.0), np.log2(g2[x] + 1.0))
                   for x in range(0, len(g1))]

    # return arrays of len(change) with the index position in c_bins
    # corresponding to the bin in which the value of change falls
    change_idx = np.digitize(change, c_bins, right=True)
    initial_idx = np.digitize(initial, i_bins, right=True)

    return(change_idx, initial_idx)
def better2D_desisty_plot(xdat, ydat, thresh=3, bins=(100, 100)):
    xyrange = [[min(xdat), max(xdat)], [min(ydat), max(ydat)]]
    distortion = (xyrange[1][1] - xyrange[1][0]) / \
        (xyrange[0][1] - xyrange[0][0])
    xdat = xdat * distortion

    xyrange = [[min(xdat), max(xdat)], [min(ydat), max(ydat)]]
    hh, locx, locy = histogram2d(xdat, ydat, range=xyrange, bins=bins)
    posx = np.digitize(xdat, locx)
    posy = np.digitize(ydat, locy)

    ind = (posx > 0) & (posx <= bins[0]) & (posy > 0) & (posy <= bins[1])
    # values of the histogram where the points are
    hhsub = hh[posx[ind] - 1, posy[ind] - 1]
    xdat1 = xdat[ind][hhsub < thresh]  # low density points
    ydat1 = ydat[ind][hhsub < thresh]
    hh[hh < thresh] = np.nan  # fill the areas with low density by NaNs

    plt.imshow(
        np.flipud(
            hh.T),
        cmap='jet',
        extent=np.array(xyrange).flatten(),
        interpolation='none')
    plt.plot(xdat1, ydat1, '.')
Beispiel #7
0
def pos2Grid(x,y,data,xbins=None,ybins=None):
    '''Make a pixellated grid image from a 1d array
        of positions x,y,d. No smoothing, just binning.'''

    if (xbins == None):
	xbins = np.arange(x.min(),x.max()+1)
    if (ybins == None):
	ybins = np.arange(y.min(),y.max()+1)
            
    xd = np.digitize(x,xbins)
    xd -= 1 
	
    yd = np.digitize(y,ybins)
    yd -= 1
	
    (w,) = np.where((xd != 0) & (yd != 0))

    xd = xd[w]
    yd = yd[w]
    data = data[w]
	
    xi,yi = np.array(np.meshgrid(xbins,ybins,indexing='ij'))
    zi = xi*0
    zi[xd,yd] = data
    return zi,xd,yd
Beispiel #8
0
def do(lon, lat, station):
    """ Process this station and geography """
    idx = np.digitize([lon, ], lons)[0]
    jdx = np.digitize([lat, ], lats)[0]
    print("--> Processing %s i:%s j:%s" % (station, idx, jdx))

    pdata = pr_nc.variables['pr'][:, jdx, idx]
    xdata = tasmax_nc.variables['tmax'][:, jdx, idx]
    ndata = tasmin_nc.variables['tmin'][:, jdx, idx]

    highs = temperature(xdata, 'C').value('F')
    lows = temperature(ndata, 'C').value('F')
    precips = distance(pdata, 'MM').value('IN')

    now = basets
    high = low = precip = None
    for k, _ in enumerate(tmdata):
        now += datetime.timedelta(days=1)
        if now.month == 2 and now.day == 29:
            # Insert missing data
            insert(station, now, high, low, precip)
            now += datetime.timedelta(days=1)
        high = fix(highs[k])
        low = fix(lows[k])
        if low is not None and high is not None and low > high:
            # Swap, sigh
            print(('%s %s high: %.1f low: %.1f was swapped'
                   ) % (now.strftime("%m-%d-%Y"), station, high, low))
            high2 = high
            high = low
            low = high2
        precip = fix(precips[k])
        insert(station, now, high, low, precip)
Beispiel #9
0
 def digitize(x, bins, right=False):
     if np.isscalar(x):
         return np.digitize(np.atleast_1d(x), bins, right)[0]
     elif x.ndim != 1:
         raise NotImplementedError("digitize for pre 1.10 numpy with ndim > "
                                   "1 array")
     return np.digitize(x, bins, right)
Beispiel #10
0
    def get_sky_positions(self,dmag=0.2,dz=0.2):

        LRGfile = os.path.expandvars("$OM10_DIR/data/CFHTLS_LRGs.txt")
        try: d = np.loadtxt(LRGfile)
        except: raise "ERROR: cannot find LRG catalog for sky positions!"

        if vb: print "om10.DB: read in LRG sky position data from ",LRGfile

        # Put LRG parameters in LRG structure:

        self.LRGs = {}
        self.LRGs['RA']       = np.array(d[:, 0])
        self.LRGs['DEC']      = np.array(d[:, 1])
        self.LRGs['redshift'] = np.array(d[:, 2])
        self.LRGs['mag_i']    = np.array(d[:, 6])

        print "Mean LRG RA,DEC,z,i = ",np.average(self.LRGs['RA']),np.average(self.LRGs['DEC']),np.average(self.LRGs['redshift']),np.average(self.LRGs['mag_i']);

        # Bin LRGs in mag_i and redshift, and record bin numbers for each one:

        imin,imax = np.min(self.LRGs['mag_i']),np.max(self.LRGs['mag_i'])
        nibins = int((imax - imin)/dmag) + 1
        ibins = np.linspace(imin, imax, nibins)
        self.LRGs['ivals'] = np.digitize(self.LRGs['mag_i'],ibins)
        self.LRGs['ibins'] = ibins

        zmin,zmax = np.min(self.LRGs['redshift']),np.max(self.LRGs['redshift'])
        nzbins = int((zmax - zmin)/dz) + 1
        zbins = np.linspace(zmin, zmax, nzbins)
        self.LRGs['zvals'] = np.digitize(self.LRGs['redshift'],zbins)
        self.LRGs['zbins'] = zbins

        if vb: print "om10.DB: number of LRGs stored = ",len(self.LRGs['redshift'])

        return
Beispiel #11
0
def scatter_density(x, y, xlabel=None, ylabel=None, title=None, xlims=None,
                    ylims=None, filename=None):
    plt.figure()
    plt.grid()
    hist, xedges, yedges = np.histogram2d(x, y)
    xidx = np.clip(np.digitize(x, xedges), 0, hist.shape[0] - 1)
    yidx = np.clip(np.digitize(y, yedges), 0, hist.shape[1] - 1)
    c = hist[xidx, yidx]
    print "starting to plot the scatter plot"
    plt.scatter(x, y, c=c)

    if xlabel:
        plt.xlabel(xlabel)
    if ylabel:
        plt.ylabel(ylabel)
    if title:
        plt.title(title)
    if xlims:
        plt.xlim(xlims)
    if ylims:
        plt.ylim(ylims)

    if filename:
        plt.savefig(filename)
    else:
        plt.show()
Beispiel #12
0
def make_image(stream, bins=(100,100), range=[[-5,5],[-5,5]], nevents=100 ):
    """ Generates an image every `nevents` events """
    image = np.zeros( shape=bins )
    binX = np.linspace( range[0][0], range[0][1], bins[0] )
    binY = np.linspace( range[1][0], range[1][1], bins[1] )
    count = 0
    xpoints = list()
    ypoints = list()
    
    for data in stream:
        detx = data.DETX[0]
        dety = data.DETY[0]

        # accumulate points for efficiency:
        if (detx > range[0][0] and detx < range[0][1] 
            and dety > range[1][0] and dety < range[1][1] ):
            xpoints.append( detx )
            ypoints.append( dety )

        count += 1

        # generate a binned image from the accumulated points:
        if count >= nevents:
            if len(xpoints) > 0:
                ii = np.digitize( xpoints, binX )
                jj = np.digitize( ypoints, binY )
                image[ii,jj] += 1
            yield image.copy() # output the image
            # clear image and data points
            count =0
            image[:] = 0
            xpoints = list()
            ypoints = list()
Beispiel #13
0
    def _bin_descriptors(self, siftgeo, pca, grid, dimensions, duration):
        """ Groups the points in different bins using the gridding specified
        by grid. The returned results is a dictionary that has a key the bin
        number on each of the three dimensions x, y and t.

        """
        W, H = dimensions
        t_init, t_final = duration
        # Create equally spaced bins.
        bins_x = linspace(0, W + 1, grid[0] + 1)
        bins_y = linspace(0, H + 1, grid[1] + 1)
        bins_t = linspace(t_init, t_final + 1, grid[2] + 1)
        bag_xx = defaultdict(list)
        bag_ll = defaultdict(list)
        N = 0
        for ss in siftgeo:
            xx = pca.transform(ss[1])
            N += 1
            id_x = digitize([ss[0]['x']], bins_x)
            id_y = digitize([ss[0]['y']], bins_y)
            id_t = digitize([ss[0]['t']], bins_t)
            bag_xx[(id_x[0], id_y[0], id_t[0])].append(xx)
            bag_ll[(id_x[0], id_y[0], id_t[0])].append([ss[0]['x'] / W, ss[0]['y'] / H, (ss[0]['t'] - t_init) / (t_final + 1 - t_init)])
            assert (1 <= id_x <= grid[0] and
                    1 <= id_y <= grid[1] and
                    1 <= id_t <= grid[2])
        return bag_xx, bag_ll
Beispiel #14
0
def hist2d(ax, xdat, ydat, xyrange, bins, thresh=2, cmap=plt.cm.Greys, log=False, scatterother=False):
    import scipy

    tt = ax.get_aspect()

    # histogram the data
    hh, locx, locy = scipy.histogram2d(xdat, ydat, range=xyrange, bins=bins)
    mhh = np.mean(hh)
    shh = np.std(hh)
    if log:
        lhh = np.log10(hh)
    else:
        lhh = hh
    posx = np.digitize(xdat, locx)
    posy = np.digitize(ydat, locy)


    #select points within the histogram
    ind = (posx > 0) & (posx <= bins[0]) & (posy > 0) & (posy <= bins[1])
    hhsub = hh[posx[ind] - 1, posy[ind] - 1] # values of the histogram where the points are
    xdat1 = xdat[ind][hhsub < thresh] # low density points
    ydat1 = ydat[ind][hhsub < thresh]
    lhh[hh  < thresh] = np.nan # fill the areas with low density by NaNs

    ar = (0.6/0.65)*(np.diff(xyrange[0])/np.diff(xyrange[1]))[0]
    c = ax.imshow(np.flipud(lhh.T),extent=np.array(xyrange).flatten(), interpolation='none', cmap=cmap, aspect=ar)  
    
    ax.set_aspect(tt)
    
    if scatterother:
        ax.plot(xdat1, ydat1, 'k,')    
    
    
    return c
Beispiel #15
0
def vertical_length_distribution(src_alt, simplex_alt, simplex_lengths, 
        alt_bins, norm=True):
    """ given input altitudes and lengths in km, create vertical
        profiles of source counts and total length.
        
        Returns alt_bins, bin_total_src, bin_total_length
        
        If norm==True, divide the counts by the bin width, returning
        km, counts/km and km/km. Otherwise just return km, counts and km.
        """
        
    # Not sure why we're not using histogram here, so that's a TODO
    # d_alt = 0.5
    d_alt = alt_bins[1:]-alt_bins[:-1]
    # alt_bins = np.arange(0.0,max_alt+d_alt, d_alt)
    bin_total_length = np.zeros(alt_bins.shape[0]-1, dtype=float)
    bin_total_src = np.zeros(alt_bins.shape[0]-1, dtype=float)
    # bin_total_length_sq = np.zeros(alt_bins.shape[0]-1, dtype=float)
    tri_bin_idx = np.digitize(simplex_alt, alt_bins)
    src_bin_idx = np.digitize(src_alt,alt_bins)
    tri_bin_idx[tri_bin_idx>(bin_total_length.shape[0]-1)]=bin_total_length.shape[0]-1
    src_bin_idx[src_bin_idx>(bin_total_src.shape[0]-1)]=bin_total_src.shape[0]-1

    for idx in src_bin_idx:
        bin_total_src[idx] += 1

    for lw,idx in zip(simplex_lengths,tri_bin_idx):
        bin_total_length[idx]+=lw
        # bin_total_length_sq[idx] += lw*lw
    # bin_total_length[tri_bin_idx] += length_weighted
    if norm==True:
        return alt_bins, bin_total_src/d_alt, bin_total_length/d_alt
    else:
        return alt_bins, bin_total_src, bin_total_length
Beispiel #16
0
def runningStatistic(x, y, statistic='mean', binNumber=10, **kwargs):
    """ Calculates the value given by statistic in bins of x. Useful for
    plotting a running mean value for a scatter plot, for example. This
    function allows the computation of the sum, mean, median, std, or other
    statistic of the values within each bin.

    NOTE: if the statistic is a callable function and there are empty data bins
    those bins will be skipped to keep the function from falling over.

    @type x: numpy array
    @param x: data over which the bins are calculated
    @type y: numpy array
    @param y: values for corresponding x values
    @type statistic: string or function
    @param statistic: The statistic to compute (default is 'mean'). Acceptable
    values are 'mean', 'median', 'sum', 'std', and callable function. Extra
    arguements are passed as kwargs.
    @type binNumber: int
    @param binNumber: The desired number of bins for the x data.
    @rtype: tuple
    @return: A tuple of two lists containing the left bin edges and the value
    of the statistic in each of the bins.

    """

    if type(statistic) == str:
        if statistic not in ['mean', 'median', 'sum', 'std']:
            raise ValueError('unrecognized statistic "%s"' % statistic)
    elif isinstance(statistic, collections.Callable):
        pass
    else:
        raise ValueError("statistic not understood")

    if not isinstance(x, numpy.ndarray):
        x = numpy.asarray(x)
    if not isinstance(y, numpy.ndarray):
        y = numpy.asarray(y)

    try:
        bins = numpy.linspace(x.min(), x.max(), binNumber)
        centers = (bins[:-1] + bins[1:]) / 2.
        index = numpy.digitize(x, bins)
    except TypeError:
        bins = binNumber
        centers = (bins[:-1] + bins[1:]) / 2.
        index = numpy.digitize(x, binNumber)
        binNumber = len(binNumber)

    if statistic == 'mean':
        running = [numpy.mean(y[index == k]) for k in range(1, binNumber)]
    elif statistic == 'median':
        running = [numpy.median(y[index == k]) for k in range(1, binNumber)]
    elif statistic == 'sum':
        running = [numpy.sum(y[index == k]) for k in range(1, binNumber)]
    elif statistic == 'std':
        running = [numpy.std(y[index == k]) for k in range(1, binNumber)]
    elif isinstance(statistic, collections.Callable):
        running = [statistic(y[index == k], **kwargs)
                   for k in range(1, binNumber) if not len(y[index == k]) == 0]
    return centers, running
Beispiel #17
0
def plot_2Dhist_medians(x, y, z, xlabel=None, ylabel=None, cblabel=None, ranges=[[-0.007, 0.002],[-0.014, 0.005]], vmin=0.0, vmax=10.0,
                filename=None):
    
    xedges = np.linspace(ranges[0][0], ranges[0][1], 51) # these numbers chosen to get 50 bins in final plot
    yedges = np.linspace(ranges[1][0], ranges[1][1], 51)

    xbins = np.digitize(x, xedges) # values falling below min(xedges) assigned 0; values above max(xedges) assigned 51
    ybins = np.digitize(y, yedges)
    
    medians = np.zeros((50,50))
    for i in range(50):
        for j in range(50):
            medians[i,j] = np.nanmedian(z[(xbins == i+1) * (ybins == j+1)])

    fig, ax = plt.subplots(figsize=(6.5, 5))
    plt.gcf().subplots_adjust(bottom=0.15)

    plt.imshow(medians.T, origin='lower', aspect='auto',
               interpolation='nearest', cmap=plt.cm.viridis, vmin=vmin, vmax=vmax,
               extent=(ranges[0][0], ranges[0][1], ranges[1][0], ranges[1][1]))

    if xlabel:
        plt.xlabel(xlabel)
    if ylabel:
        plt.ylabel(ylabel)
    cb = plt.colorbar()
    if cblabel:
        cb.set_label(cblabel)

    plt.draw()
    plt.tight_layout()

    if filename:
        plt.savefig(filename)
def shuffle_centrals(mock, centrals, bins, shuffle_props, mock_prop='mvir', use_log=True):
    """
    Shuffle central galaxies amongst haloes.  
    Only one central per halo is allowed.
    """
    
    shuffled_mock = np.copy(mock)
    
    central_inds = np.where(centrals==True)[0]
    
    if use_log==True:
        inds = np.digitize(np.log10(mock[mock_prop][centrals]), bins=bins)
    else:
        inds = np.digitize(mock[mock_prop][centrals], bins=bins)
    
    
    for i in range(0,len(bins)-1):
        
        inds_in_bin = (inds==i+1)
        inds_in_bin = central_inds[inds_in_bin]
        
        shufled_inds_in_bin = np.random.permutation(inds_in_bin)
        
        for prop in shuffle_props:
            shuffled_mock[prop][shufled_inds_in_bin] = mock[prop][inds_in_bin]

    return shuffled_mock
def binder(positions, orientations, bl, m=4, method='ball', margin=0):
    """ Calculate the binder cumulant for a frame, given positions and orientations.

        bl: the binder length scale, such that
            B(bl) = 1 - .333 * S4 / S2^2
        where SN are <phibl^N> averaged over each block/cluster of size bl in frame.
    """
    if margin:
        if margin < ss:
            margin *= ss
        center = 0.5*(positions.max(0) + positions.min(0))
        dmask = d < d.max() - margin
        positions = positions[dmask]
        orientations = orientations[dmask]
    if 'neigh' in method or 'ball' in method:
        tree = cKDTree(positions)
        balls = tree.query_ball_tree(tree, bl)
        balls, ball_mask = helpy.pad_uneven(balls, 0, True, int)
        ball_orient = orientations[balls]
        ball_orient[~ball_mask] = np.nan
        phis = np.nanmean(np.exp(m*ball_orient*1j), 1)
        phi2 = np.dot(phis, phis) / len(phis)
        phiphi = phis*phis
        phi4 = np.dot(phiphi, phiphi) / len(phiphi)
        return 1 - phi4 / (3*phi2*phi2)
    else:
        raise ValueError, "method {} not implemented".format(method)
    #elif method=='block':
        left, right, bottom, top = (positions[:,0].min(), positions[:,0].max(),
                                    positions[:,1].min(), positions[:,1].max())
        xbins, ybins = np.arange(left, right + bl, bl), np.arange(bottom, top + bl, bl)
        blocks = np.rollaxis(np.indices((xbins.size, ybins.size)), 0, 3)
        block_ind = np.column_stack([
                     np.digitize(positions[:,0], xbins),
                     np.digitize(positions[:,1], ybins)])
def relPolarCoordAverageMap(relPolMeanPlt, distEdges, angleEdges, valuesToMap, objDistance, gamma, colorMap, useMean,
                            maxValue, xlab, ylab):

    # bin valuesToMap by objectDistance value
    digitizedDist = np.digitize(objDistance, distEdges)

    # bin valuesToMap by objectDistance value
    digitizedAngle = np.digitize(gamma, angleEdges)

    meanVals = 1.0*np.zeros((len(angleEdges), len(distEdges)))

    for distBin in range(1, 1+len(distEdges)):
        for angleBin in range(1, 1+len(angleEdges)):
            sltPts = np.logical_and(digitizedDist == distBin, digitizedAngle == angleBin)
            if sum(sltPts) > 0:
                if useMean:
                    meanVals[angleBin-1, distBin-1, ] = np.mean(valuesToMap[sltPts])
                else:
                    # use median
                    meanVals[angleBin-1, distBin-1, ] = np.median(valuesToMap[sltPts])

    pc = relPolMeanPlt.pcolormesh(distEdges, angleEdges, meanVals, cmap=colorMap, vmin=-maxValue, vmax=maxValue)
    relPolMeanPlt.set_xlim(min(distEdges), max(distEdges))
    relPolMeanPlt.set_ylim(min(angleEdges), max(angleEdges))
    relPolMeanPlt.set_xlabel(xlab)
    relPolMeanPlt.set_ylabel(ylab)

    return relPolMeanPlt, meanVals, pc
Beispiel #21
0
 def indices(self,xyz) :
   """
   Returns the grid coordinates for a set of Cartesian coordinates
   """
   xidx = np.digitize(xyz[:,0],self.edgesx)
   yidx = np.digitize(xyz[:,1],self.edgesy)
   return np.array([xidx,yidx])
Beispiel #22
0
    def __getitem__(self, key):
        """
        Implements slicing or indexing of the Histogram
        """
        if key is (): return self # May no longer be necessary
        if isinstance(key, tuple) and len(key) > self.ndims:
            raise Exception("Slice must match number of key dimensions.")

        centers = [(float(l)+r)/2 for (l,r) in zip(self.edges, self.edges[1:])]
        if isinstance(key, slice):
            start, stop = key.start, key.stop
            if [start, stop] == [None,None]: return self
            start_idx, stop_idx = None,None
            if start is not None:
                start_idx = np.digitize([start], centers, right=True)[0]
            if stop is not None:
                stop_idx = np.digitize([stop], centers, right=True)[0]

            slice_end = stop_idx+1 if stop_idx is not None else None
            slice_values = self.values[start_idx:stop_idx]
            slice_edges =  self.edges[start_idx: slice_end]

            extents = (min(slice_edges), self.extents[1],
                       max(slice_edges), self.extents[3])
            return self.clone((slice_values, slice_edges), extents=extents)
        else:
            if not (self.edges.min() <= key < self.edges.max()):
                raise Exception("Key value %s is out of the histogram bounds" % key)
            idx = np.digitize([key], self.edges)[0]
            return self.values[idx-1 if idx>0 else idx]
Beispiel #23
0
def take2D(histogram, x, y, bins_x, bins_y):
    """
    Take the value from a two-dimensional histogram from the bin corresponding to (x, y).

    Parameters:
    -----------
    histogram : The values in the histogram (n,m) (ADW: is this ordering right?)
    x : the x-value to take from the hist
    y : the y-value to take from the hist
    bins_x : the xbin edges, including upper edge (n-dim)
    bins_y : the ybin edges, including upper edge (m-dim)
    """
    histogram = np.array(histogram)
    
    if np.isscalar(x):
        x = [x]
    if np.isscalar(y):
        y = [y]

    bins_x[-1] += 1.e-10 * (bins_x[-1] - bins_x[-2]) # Numerical stability
    bins_y[-1] += 1.e-10 * (bins_y[-1] - bins_y[-2])

    #return np.take(histogram, (histogram.shape[1] * (np.digitize(y, bins_y) - 1)) + (np.digitize(x, bins_x) - 1))

    # Return np.nan for entries which are outside the binning range on either axis
    index = (histogram.shape[1] * (np.digitize(y, bins_y) - 1)) + (np.digitize(x, bins_x) - 1)
    index_clipped = np.clip(index, 0, (histogram.shape[0] * histogram.shape[1]) - 1)
    val = np.take(histogram, index_clipped)

    outlier_x = np.logical_or(x < bins_x[0], x > bins_x[-1])
    outlier_y = np.logical_or(y < bins_y[0], y > bins_y[-1])
    outlier = np.logical_or(outlier_x, outlier_y)
    val[outlier] = np.nan

    return val 
Beispiel #24
0
def _get_rejrej_array(flat_eff, flat_x, flat_y, x_range=None, y_range=None):
    indices = np.nonzero((flat_eff > 0.005) & np.isfinite(flat_x) & np.isfinite(flat_y))

    used_x = np.log10(flat_x[indices])
    used_y = np.log10(flat_y[indices])
    used_eff = flat_eff[indices]

    if not x_range:
        # allow 1% safety margin on max value
        max_x = _max_noninf(used_x) * 1.0001
        min_x = np.min(used_x)
    else:
        min_x, max_x = x_range

    if not y_range:
        max_y = _max_noninf(used_y) * 1.0001
        min_y = np.min(used_y)
    else:
        min_y, max_y = y_range

    n_out_bins = 100

    x_bin_values = np.linspace(min_x, max_x, n_out_bins)
    x_bins = np.digitize(used_x, bins=x_bin_values) - 1  # no underflow

    y_bin_values = np.linspace(min_y, max_y, n_out_bins)
    y_bins = np.digitize(used_y, bins=y_bin_values) - 1  # no underflow

    make_eff_array = _loop_over_entries  # the other method seems slower

    eff_array = make_eff_array(x_bins, y_bins, used_eff, n_out_bins)

    return eff_array, (min_x, max_x), (min_y, max_y)
Beispiel #25
0
    def place(self, sig, bg_x, bg_y, cut_1_range, cut_2_range):
        """
        calculates x,y,z coordinates (rej x, rej y, eff)
        
        NOTE: make sure the eff, rej_x, rej_y arrays are integrated
        """
        assert bg_x.shape == bg_y.shape
        npts_1, npts_2 = bg_x.shape

        c1_bin_bounds = np.linspace(*cut_1_range, num=(npts_1 + 1))
        c1_bin = np.digitize([self._cut_1], c1_bin_bounds) - 1

        c2_bin_bounds = np.linspace(*cut_2_range, num=(npts_2 + 1))
        c2_bin = np.digitize([self._cut_2], c2_bin_bounds) - 1

        if any(b < 0 for b in [c1_bin, c2_bin]):
            raise ValueError("can't put a cut in the underflow bin")

        eff = float(sig[c1_bin, c2_bin] / sig.max())

        def get_rej(bkg_array):
            array_val = bkg_array.max() / bkg_array[c1_bin, c2_bin]
            return float(array_val)

        rej_x, rej_y = [get_rej(ar) for ar in [bg_x, bg_y]]

        self._xyz = rej_x, rej_y, eff
        self._cut_ranges = (cut_1_range, cut_2_range)
Beispiel #26
0
def sim_make_residual_images(rmcal,binX=32,binY=32):
	xBins = np.arange(0,nX+1,binX)
	yBins = np.arange(0,nY+1,binY)
	median_a_offset = 0
	dmag = []
	for i,obj in enumerate(rmcal):
		mag,err = rmcal.get_object_phot(obj)
		dmag.append(obj.refMag - (mag - median_a_offset))
	dmag = np.concatenate(dmag)
	xy = np.hstack( [ [rmcal.objs[i].xpos,rmcal.objs[i].ypos] 
	                             for i in range(rmcal.num_objects()) ] )
	# XXX hack that last index in a_indices is ccdNum
	ccds = np.concatenate( [ rmcal.objs[i].a_indices[-1]
	                             for i in range(rmcal.num_objects()) ] )
	ffmaps = []
	for ccdNum in range(4):
		ffmap = [[[] for xi in xBins] for yi in yBins]
		ii = np.where(ccds==ccdNum)[0]
		for xi,yi,dm in zip(np.digitize(xy[0,ii],xBins),
		                    np.digitize(xy[1,ii],yBins),
		                    dmag[ii]):
			ffmap[yi][xi].append(dm)
		for xi in range(len(xBins)):
			for yi in range(len(yBins)):
				if len(ffmap[yi][xi])==0:
					ffmap[yi][xi] = np.nan
				else:
					ffmap[yi][xi] = np.median(ffmap[yi][xi])
		ffmaps.append(np.array(ffmap))
	return np.array(ffmaps)
Beispiel #27
0
    def updateViz(self):
    	if self.gridRadiusViz == 0:
		vals=[]
		for name in self.vizObjectNames:
			r = moose.element(name+self.moosepath)
			d = float(r.getField(self.variable))
                        vals.append(d)
		inds = digitize(vals,self.stepVals)
		for i in range(0,len(self.vizObjects)):
			self.vizObjects[i].r,self.vizObjects[i].g,self.vizObjects[i].b=self.colorMap[inds[i]-1]

	else:
		vals=[]
		vals_2=[]
		for name in self.vizObjectNames:
                    r=mc.pathToId(name+self.moosepath)
                    d=float(mc.getField(r,self.variable))
                    

                    r2=mc.pathToId(name+self.moosepath_2)
                    d2=float(mc.getField(r2,self.variable_2))
				
                    vals.append(d)
                    vals_2.append(d2)
			
		inds = digitize(vals,self.stepVals)
		inds_2 = digitize(vals_2,self.stepVals_2)

		for i in range(0,len(self.vizObjects)):
			self.vizObjects[i].r,self.vizObjects[i].g,self.vizObjects[i].b=self.colorMap[inds[i]-1]
			self.vizObjects[i].radius=self.indRadius[inds_2[i]-1]

	self.updateGL()
Beispiel #28
0
def get_line_histos(results, temp, image, axis=0, bins=None):
    """This function creates an ADU histogram per each pixel in the direction defined by the axis parameter.
    """
    if image is None:
        temp["current_entry"] += 1
        return results, temp

    if bins is None:
        bins = np.arange(-100, 1000, 5)

    for i in range(image.shape[axis]):
        if axis == 0:
            t_histo = np.bincount(np.digitize(image[i, :].flatten(), bins[1:-1]), 
                          minlength=len(bins) - 1)
        elif axis == 1:
            t_histo = np.bincount(np.digitize(image[:, i].flatten(), bins[1:-1]), 
                          minlength=len(bins) - 1)

        if temp["current_entry"] == 0 and i == 0:
            results["histo_counts_line"] = np.empty([image.shape[axis], t_histo.shape[0]], 
                                                dtype=image.dtype)
        if temp["current_entry"] == 0:           
            results["histo_counts_line"][i] = t_histo
        else:
            results["histo_counts_line"][i] += t_histo
    temp["current_entry"] += 1
    return results, temp
Beispiel #29
0
def Mars_Year_np(j2k_np, jday_vals, year_vals, year_length, return_length=False):
    jday_vals = np.array(jday_vals)

    year_vals = np.array(year_vals)

    year_length = np.array(year_length)

    if j2k_np < jday_vals[0]:
        return np.floor(1+(j2k_np-jday_vals[0])/year_length[0])
    elif j2k_np >= jday_vals[-1]:
        return np.floor(1+(j2k_np-jday_vals[-1])/year_length[-1])
    else:
        try:
            v=np.clip(np.digitize(j2k_np,jday_vals),1,jday_vals.size)-1
            y = year_vals[v]
            l = year_length[v]
        except:
            v=np.clip(np.digitize([j2k_np],jday_vals),1,jday_vals.size)-1
            y = year_vals[v][0]
            l = year_length[v][0]

    if return_length:
        return (y*1.0,l)
    else:
        return y*1.0
Beispiel #30
0
def generate_biomes(data_path):
    if os.path.isfile(data_path + "biomes.pkl"):
        return
    
    moisture = pickle.load(open(data_path+"moisture.pkl", 'rb'))
    moisture = imresize(moisture, (IMAGE_HEIGHT, IMAGE_WIDTH))
    plt.imshow(moisture)
    plt.show()
    moisture = np.digitize(moisture, [0, 100, 170, 230, 255])-1
    moisture[moisture > 4] = 4
    plt.imshow(moisture)
    plt.show()
    temp = pickle.load(open(data_path+"temperature.pkl", 'rb'))
    temp = imresize(temp, (IMAGE_HEIGHT, IMAGE_WIDTH))
    plt.imshow(temp)
    plt.show()
    temp = np.digitize(temp, [0, 90, 130, 255])-1
    temp[temp > 2] = 2
    plt.imshow(temp)
    plt.show()

    biomes = [
        [BARE, TUNDRA, TAIGA, SNOW, OCEAN],
        [GRASSLAND, WOODLAND, TEMPERATE_FOREST, TEMPERATE_RAINFOREST, OCEAN],
        [DESERT, SAVANNAH, TROPICAL_SEASONAL_FOREST, TROPICAL_RAINFOREST, OCEAN]
        ]
    img = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH))
    for i in range(IMAGE_HEIGHT):
        for j in range(IMAGE_WIDTH):
            img[i,j] = biomes[temp[i,j]][moisture[i,j]]
    elevation = pickle.load(open(data_path+"elevation.pkl", 'rb'))
    img[elevation == 0] = OCEAN
    plt.imshow(img)
    plt.show()
    pickle.dump(img, open(data_path+"biomes.pkl", 'wb'))
Beispiel #31
0
#
# Plot the distribution
#
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
for cl in classes:
    print "%s: %s"%(cl, np.mean(neighbourhoods_content[cl]))

    ## Bin the data
    cs = np.array(neighbourhoods_content[cl])
    N_bins = 11 
    bins = np.linspace(min(cs), max(cs), N_bins)
    l_bin = (max(cs)-min(cs)) / N_bins
    m = np.mean(cs)
    s = np.std(cs)
    digitized = np.digitize(cs, bins)
    cs_mean = [cs[digitized == i].mean() for i in range(1, len(bins))]
    cs_counts = [0 for i in range(len(bins))]
    for d in digitized:
        cs_counts[d-1] += 1/(len(cs)*l_bin)

    # Clean the NaN values out
    cs_clean, counts_clean = zip(* filter( lambda x: not np.isnan(x[0]),
                                    zip(cs_mean,cs_counts[1:]) ))
    ax.plot(cs_clean, counts_clean, 'k-', color=colours[cl], lw=3,
            label=r"$%s$"%cl)
    ax.set_xlabel(r'$\frac{H_\alpha^n}{H_\alpha}$', fontsize=30)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_position(('outward', 10))  # outward by 10 points
    ax.spines['bottom'].set_position(('outward', 10))  # outward by 10 points
def _digitize_1d(X, bins, n_samples, n_timestamps):
    X_digit = np.empty((n_samples, n_timestamps))
    for i in prange(n_samples):
        X_digit[i] = np.digitize(X[i], bins, right=True)
    return X_digit
Beispiel #33
0
def main():

    args = get_parser().parse_args()

    # -- debugging option
    if args.debug:
        logging.basicConfig(level=logging.DEBUG)

    file_time = check_neon_time()

    # --  specify site from which to extract data
    site_name = args.site_name

    # --  Look for surface data
    surf_dir = args.surf_dir
    surf_file = find_surffile(surf_dir, site_name)

    # --  directory structure
    current_dir = os.getcwd()
    parent_dir = os.path.dirname(current_dir)
    clone_dir = os.path.abspath(os.path.join(__file__, "../../.."))
    neon_dir = os.path.join(clone_dir, "neon_surffiles")

    print("Present Directory", current_dir)

    # --  download neon data if needed
    neon_file = get_neon(neon_dir, site_name)

    # -- Read neon data
    df = pd.read_csv(neon_file)

    # -- Read surface dataset files
    print("surf_file:", surf_file)
    f1 = xr.open_dataset(surf_file)

    # -- Find surface dataset soil depth information
    soil_bot, soil_top = find_soil_structure(surf_file)

    # -- Find surface dataset soil levels
    # TODO: how? NS uses metadata on file to find
    # soil strucure
    # better suggestion by WW to write dzsoi to neon surface dataset
    # This todo needs to go to the subset_data

    # TODO Will: if I sum them up , are they 3.5? (m) YES
    print("soil_top:", soil_top)
    print("soil_bot:", soil_bot)
    print("Sum of soil top depths    :", sum(soil_top))
    print("Sum of soil bottom depths :", sum(soil_bot))

    soil_top = np.cumsum(soil_top)
    soil_bot = np.cumsum(soil_bot)
    soil_mid = 0.5 * (soil_bot - soil_top) + soil_top
    # print ("Cumulative sum of soil bottom depths :", sum(soil_bot))

    obs_top = df["biogeoTopDepth"] / 100
    obs_bot = df["biogeoBottomDepth"] / 100

    # -- Mapping surface dataset and neon soil levels
    bins = df["biogeoTopDepth"] / 100
    bin_index = np.digitize(soil_mid, bins) - 1
    """
    print ("================================")
    print ("  Neon data soil structure:     ")
    print ("================================")

    print ("------------","ground","------------")
    for i in range(len(obs_bot)):
        print ("layer",i)
        print ("-------------",
                "{0:.2f}".format(obs_bot[i]),
                "-------------")

    print ("================================")
    print ("Surface data soil structure:    ")
    print ("================================")

    print ("------------","ground","------------")
    for b in range(len(bin_index)):
        print ("layer",b)
        print ("-------------",
                "{0:.2f}".format(soil_bot[b]),
                "-------------")
    """

    # -- update fields with neon
    f2 = f1
    soil_levels = f2["PCT_CLAY"].size
    for soil_lev in range(soil_levels):
        print("--------------------------")
        print("soil_lev:", soil_lev)
        print(df["clayTotal"][bin_index[soil_lev]])
        f2["PCT_CLAY"][soil_lev] = df["clayTotal"][bin_index[soil_lev]]
        f2["PCT_SAND"][soil_lev] = df["sandTotal"][bin_index[soil_lev]]

        bulk_den = df["bulkDensExclCoarseFrag"][bin_index[soil_lev]]
        carbon_tot = df["carbonTot"][bin_index[soil_lev]]
        estimated_oc = df["estimatedOC"][bin_index[soil_lev]]

        # -- estimated_oc in neon data is rounded to the nearest integer.
        # -- Check to make sure the rounded oc is not higher than carbon_tot.
        # -- Use carbon_tot if estimated_oc is bigger than carbon_tot.

        if estimated_oc > carbon_tot:
            estimated_oc = carbon_tot

        layer_depth = (df["biogeoBottomDepth"][bin_index[soil_lev]] -
                       df["biogeoTopDepth"][bin_index[soil_lev]])

        # f2["ORGANIC"][soil_lev] = estimated_oc * bulk_den / 0.58

        # -- after adding caco3 by NEON:
        # -- if caco3 exists:
        # -- inorganic = caco3/100.0869*12.0107
        # -- organic = carbon_tot - inorganic
        # -- else:
        # -- oranigc = estimated_oc * bulk_den /0.58

        caco3 = df["caco3Conc"][bin_index[soil_lev]]
        inorganic = caco3 / 100.0869 * 12.0107
        print("inorganic:", inorganic)

        if not np.isnan(inorganic):
            actual_oc = carbon_tot - inorganic
        else:
            actual_oc = estimated_oc

        f2["ORGANIC"][soil_lev] = actual_oc * bulk_den / 0.58

        print("~~~~~~~~~~~~~~~~~~~~~~~~")
        print("inorganic:")
        print("~~~~~~~~~~~~~~~~~~~~~~~~")
        print(inorganic)
        print("~~~~~~~~~~~~~~~~~~~~~~~~")

        print("bin_index    : ", bin_index[soil_lev])
        print("layer_depth  : ", layer_depth)
        print("carbon_tot   : ", carbon_tot)
        print("estimated_oc : ", estimated_oc)
        print("bulk_den     : ", bulk_den)
        print("organic      :", f2["ORGANIC"][soil_lev].values)
        print("--------------------------")

    # -- Interpolate missing values
    method = "linear"
    fill_interpolate(f2, "PCT_CLAY", method)
    fill_interpolate(f2, "PCT_SAND", method)
    fill_interpolate(f2, "ORGANIC", method)

    # -- Update zbedrock if neon observation does not make it down to 2m depth
    rock_thresh = 2

    zb_flag = False

    if obs_bot.iloc[-1] < rock_thresh:
        print("zbedrock is updated.")
        f2["zbedrock"].values[:, :] = obs_bot.iloc[-1]
        zb_flag = True

    sort_print_soil_layers(obs_bot, soil_bot)

    # -- updates for ag sites : KONA and STER
    ag_sites = ["KONA", "STER"]
    if site_name in ag_sites:
        print("Updating PCT_NATVEG")
        print("Original : ", f2.PCT_NATVEG.values)
        f2.PCT_NATVEG.values = [[0.0]]
        print("Updated  : ", f2.PCT_NATVEG.values)

        print("Updating PCT_CROP")
        print("Original : ", f2.PCT_CROP.values)
        f2.PCT_CROP.values = [[100.0]]
        print("Updated  : ", f2.PCT_CROP.values)

        print("Updating PCT_NAT_PFT")
        #print (f2.PCT_NAT_PFT)
        print(f2.PCT_NAT_PFT.values[0])
        f2.PCT_NAT_PFT.values[0] = [[100.0]]
        print(f2.PCT_NAT_PFT[0].values)

    out_dir = args.out_dir

    # -- make out_dir if it does not exist
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # -- update time tag for the output file
    wfile = out_dir + update_time_tag(surf_file)

    # -- update netcdf metadata
    f2 = update_metadata(f2, surf_file, neon_file, zb_flag)

    print(f2.attrs)
    f2.to_netcdf(path=wfile, mode="w", format="NETCDF3_64BIT")

    print("Successfully updated surface data file for neon site(" + site_name +
          "):\n - " + wfile)
Beispiel #34
0
def to_bin(value, bins):
    return np.digitize(x=[value], bins=bins)[0]
Beispiel #35
0
    model = Model(inputs=inputs, outputs=proba)
    model.compile(loss='binary_crossentropy', optimizer=optimizer)
    return model


with open('/home/alex/diplom/constants/buckets_info.pkl', 'rb') as f:
    mapping_seq_len_to_padded_len = pickle.load(f)

with open('/home/alex/diplom/constants/dense_features_buckets.pkl', 'rb') as f:
    dense_features_buckets = pickle.load(f)

df = pd.read_csv(sys.argv[1])

for dense_col in ['amnt', 'days_before', 'hour_diff']:
    df[dense_col] = np.digitize(df[dense_col],
                                bins=dense_features_buckets[dense_col])

seq = transform_transactions_to_sequences(df)
seq['sequence_length'] = seq.sequences.apply(lambda x: len(x[1]))
seq['product'] = 1

x = create_padded_buckets(seq,
                          mapping_seq_len_to_padded_len,
                          save_to_file_path=None,
                          has_target=False)

embedding_projections = {
    'currency': (11, 6),
    'operation_kind': (7, 5),
    'card_type': (175, 29),
    'operation_type': (22, 9),
def get_state(observation):
	pos, vel =  observation
	pos_bin = int(np.digitize(pos, pos_space))
	vel_bin = int(np.digitize(vel, vel_space))
	return (pos_bin, vel_bin)
    def init_2D(cls,
                mesh1: Mesh2D,
                mesh2: Mesh2D,
                boundary1: ndarray,
                boundary2: ndarray,
                tangent: ndarray):
        """Create mortar mappings for two 2D meshes via projection.

        Parameters
        ----------
        mesh1
            An object of the type :class:`~skfem.mesh.mesh_2d.Mesh2D`.
        mesh2
            An object of the type :class:`~skfem.mesh.mesh_2d.Mesh2D`.
        boundary1
            A subset of facets to use from mesh1.
        boundary2
            A subset of facets to use from mesh2.
        tangent
            A tangent vector defining the direction of the projection.

        """
        from ..mesh import MeshLine
        tangent /= np.linalg.norm(tangent)

        # find unique nodes on the two boundaries
        p1_ix = np.unique(mesh1.facets[:, boundary1].flatten())
        p2_ix = np.unique(mesh2.facets[:, boundary2].flatten())
        p1 = mesh1.p[:, p1_ix]
        p2 = mesh2.p[:, p2_ix]

        def proj(p):
            """Project onto the line defined by 'tangent'."""
            return np.outer(tangent, tangent) @ p

        def param(p):
            """Calculate signed distances of projected points from origin."""
            y = proj(p)
            return np.linalg.norm(y, axis=0) * np.sign(np.dot(tangent, y))

        # find unique supermesh facets by combining nodes from both sides
        param_p1 = param(p1)
        param_p2 = param(p2)
        _, ix = np.unique(np.concatenate((param_p1, param_p2)),
                          return_index=True)
        ixorig = np.concatenate((p1_ix, p2_ix + mesh1.p.shape[1]))[ix]
        p = np.array([np.hstack((param(mesh1.p), param(mesh2.p)))])
        t = np.array([ixorig[:-1], ixorig[1:]])

        # create 1-dimensional supermesh from the intersections of the
        # projected facet elements
        p = p[:, np.concatenate((t[0], np.array([t[1, -1]])))]
        range_max = np.min([np.max(param_p1), np.max(param_p2)])
        range_min = np.max([np.min(param_p1), np.min(param_p2)])
        p = np.array([p[0, (p[0] <= range_max) * (p[0] >= range_min)]])
        t = np.array([np.arange(p.shape[1] - 1), np.arange(1, p.shape[1])])
        m_super = MeshLine(p, t)

        # helper meshes for creating the mappings
        m1 = MeshLine(np.sort(param_p1), np.array([np.arange(p1.shape[1] - 1),
                                                   np.arange(1, p1.shape[1])]))
        m2 = MeshLine(np.sort(param_p2), np.array([np.arange(p2.shape[1] - 1),
                                                   np.arange(1, p2.shape[1])]))

        # construct normals by rotating 'tangent'
        normal = np.array([tangent[1], -tangent[0]])
        normals = normal[:, None].repeat(t.shape[1], axis=1)

        # initialize mappings (for orienting)
        map_super = m_super._mapping()
        map_m1 = m1._mapping()
        map_m2 = m2._mapping()
        map_mesh1 = mesh1._mapping()
        map_mesh2 = mesh2._mapping()

        # matching of elements in the supermesh and the helper meshes
        mps = map_super.F(np.array([[.5]]))
        ix1 = np.digitize(mps[0, :, 0], m1.p[0]) - 1
        ix2 = np.digitize(mps[0, :, 0], m2.p[0]) - 1

        # for each element, map two points to global coordinates, reparametrize
        # the points, and flip corresponding helper mesh element indices if
        # sorting is wrong
        f1mps = .5 * (mesh1.p[:, mesh1.facets[0, boundary1]] +
                      mesh1.p[:, mesh1.facets[1, boundary1]])
        sort_boundary1 = np.argsort(param(f1mps))
        z1 = map_mesh1.G(map_m1.invF(map_super.F(np.array([[.25, .75]])),
                                     tind=ix1),
                         find=boundary1[sort_boundary1][ix1])
        ix1_flip = np.unique(ix1[param(z1[:, :, 1]) < param(z1[:, :, 0])])
        m1t = m1.t.copy()
        m1t[:, ix1_flip] = np.flipud(m1t[:, ix1_flip])
        m1 = replace(m1, t=m1t)

        f2mps = .5 * (mesh2.p[:, mesh2.facets[0, boundary2]] +
                      mesh2.p[:, mesh2.facets[1, boundary2]])
        sort_boundary2 = np.argsort(param(f2mps))
        z2 = map_mesh2.G(map_m2.invF(map_super.F(np.array([[.25, .75]])),
                                     tind=ix2),
                         find=boundary2[sort_boundary2][ix2])
        ix2_flip = np.unique(ix2[param(z2[:, :, 1]) < param(z2[:, :, 0])])
        m2t = m2.t.copy()
        m2t[:, ix2_flip] = np.flipud(m2t[:, ix2_flip])
        m2 = replace(m2, t=m2t)

        # construct normals by rotating 'tangent'
        normal = np.array([tangent[1], -tangent[0]])
        normals = normal[:, None].repeat(t.shape[1], axis=1)

        # initialize mappings (for orienting)
        map_super = m_super._mapping()
        map_m1 = m1._mapping()
        map_m2 = m2._mapping()
        map_mesh1 = mesh1._mapping()
        map_mesh2 = mesh2._mapping()

        # matching of elements in the supermesh and the helper meshes
        mps = map_super.F(np.array([[.5]]))
        ix1 = np.digitize(mps[0, :, 0], m1.p[0]) - 1
        ix2 = np.digitize(mps[0, :, 0], m2.p[0]) - 1

        return cls((map_mesh1, map_mesh2),
                   (boundary1[sort_boundary1][ix1],
                    boundary2[sort_boundary2][ix2]),
                   (map_m1, map_m2),
                   (ix1, ix2),
                   map_super,
                   normals)
Beispiel #38
0
    def get_energy_dispersion(self, position, e_reco, migra_step=5e-3):
        """Get energy dispersion at a given position.

        Parameters
        ----------
        position : `~astropy.coordinates.SkyCoord`
            the target position. Should be a single coordinates
        e_reco : `~astropy.units.Quantity`
            Reconstructed energy axis binning
        migra_step : float
            Integration step in migration

        Returns
        -------
        edisp : `~gammapy.irf.EnergyDispersion`
            the energy dispersion (i.e. rmf object)
        """
        # TODO: reduce code duplication with EnergyDispersion2D.get_response
        if position.size != 1:
            raise ValueError(
                "EnergyDispersion can be extracted at one single position only."
            )

        # axes ordering fixed. Could be changed.
        pix_ener = np.arange(self.edisp_map.geom.axes[1].nbin)

        # Define a vector of migration with mig_step step
        mrec_min = self.edisp_map.geom.axes[0].edges[0]
        mrec_max = self.edisp_map.geom.axes[0].edges[-1]
        mig_array = np.arange(mrec_min, mrec_max, migra_step)
        pix_migra = (mig_array - mrec_min) / mrec_max * self.edisp_map.geom.axes[0].nbin

        # Convert position to pixels
        pix_lon, pix_lat = self.edisp_map.geom.to_image().coord_to_pix(position)

        # Build the pixels tuple
        pix = np.meshgrid(pix_lon, pix_lat, pix_migra, pix_ener)
        # Interpolate in the EDisp map. Squeeze to remove dimensions of length 1
        edisp_values = np.squeeze(
            self.edisp_map.interp_by_pix(pix)
            * u.Unit(self.edisp_map.unit)  # * migra_step
        )
        e_trues = self.edisp_map.geom.axes[1].center
        data = []

        for i, e_true in enumerate(e_trues):
            # We now perform integration over migra
            # The code is adapted from `~gammapy.EnergyDispersion2D.get_response`

            # migration value of e_reco bounds
            migra_e_reco = e_reco / e_true

            # Compute normalized cumulative sum to prepare integration
            tmp = np.nan_to_num(
                np.cumsum(edisp_values[:, i]) / np.sum(edisp_values[:, i])
            )

            # Determine positions (bin indices) of e_reco bounds in migration array
            pos_mig = np.digitize(migra_e_reco, mig_array) - 1
            # We ensure that no negative values are found
            pos_mig = np.maximum(pos_mig, 0)

            # We compute the difference between 2 successive bounds in e_reco
            # to get integral over reco energy bin
            integral = np.diff(tmp[pos_mig])

            data.append(integral)

        data = np.asarray(data)
        # EnergyDispersion uses edges of true energy bins
        e_true_edges = self.edisp_map.geom.axes[1].edges

        e_lo, e_hi = e_true_edges[:-1], e_true_edges[1:]
        ereco_lo, ereco_hi = (e_reco[:-1], e_reco[1:])

        return EnergyDispersion(
            e_true_lo=e_lo,
            e_true_hi=e_hi,
            e_reco_lo=ereco_lo,
            e_reco_hi=ereco_hi,
            data=data,
        )
Beispiel #39
0
    def select(cls, dataset, selection_mask=None, **selection):
        if selection_mask is not None:
            raise ValueError(
                "Masked selections currently not supported for {0}.".format(
                    cls.__name__))

        dimensions = dataset.kdims
        val_dims = [vdim for vdim in dataset.vdims if vdim in selection]
        if val_dims:
            raise IndexError(
                'Cannot slice value dimensions in compressed format, '
                'convert to expanded format before slicing.')

        indexed = cls.indexed(dataset, selection)
        full_selection = [(d, selection.get(d.name, selection.get(d.label)))
                          for d in dimensions]
        data = {}
        value_select = []
        for i, (dim, ind) in enumerate(full_selection):
            irregular = cls.irregular(dataset, dim)
            values = cls.coords(dataset, dim, irregular)
            mask = cls.key_select_mask(dataset, values, ind)
            if irregular:
                if np.isscalar(ind) or isinstance(ind, (set, list)):
                    raise IndexError(
                        "Indexing not supported for irregularly "
                        "sampled data. %s value along %s dimension."
                        "must be a slice or 2D boolean mask." % (ind, dim))
                mask = mask.max(axis=i)
            elif dataset._binned:
                edges = cls.coords(dataset, dim, False, edges=True)
                inds = np.argwhere(mask)
                if np.isscalar(ind):
                    emin, emax = edges.min(), edges.max()
                    if ind < emin:
                        raise IndexError("Index %s less than lower bound "
                                         "of %s for %s dimension." %
                                         (ind, emin, dim))
                    elif ind >= emax:
                        raise IndexError(
                            "Index %s more than or equal to upper bound "
                            "of %s for %s dimension." % (ind, emax, dim))
                    idx = max([np.digitize([ind], edges)[0] - 1, 0])
                    mask = np.zeros(len(values), dtype=np.bool)
                    mask[idx] = True
                    values = edges[idx:idx + 2]
                elif len(inds):
                    values = edges[inds.min():inds.max() + 2]
                else:
                    values = edges[0:0]
            else:
                values = values[mask]
            values, mask = np.asarray(values), np.asarray(mask)
            value_select.append(mask)
            data[dim.name] = np.array([values
                                       ]) if np.isscalar(values) else values

        int_inds = [np.argwhere(v) for v in value_select][::-1]
        index = np.ix_(*[
            np.atleast_1d(np.squeeze(ind)) if ind.ndim > 1 else np.
            atleast_1d(ind) for ind in int_inds
        ])

        for kdim in dataset.kdims:
            if cls.irregular(dataset, dim):
                da = dask_array_module()
                if da and isinstance(dataset.data[kdim.name], da.Array):
                    data[kdim.name] = dataset.data[kdim.name].vindex[index]
                else:
                    data[kdim.name] = np.asarray(data[kdim.name])[index]

        for vdim in dataset.vdims:
            da = dask_array_module()
            if da and isinstance(dataset.data[vdim.name], da.Array):
                data[vdim.name] = dataset.data[vdim.name].vindex[index]
            else:
                data[vdim.name] = np.asarray(dataset.data[vdim.name])[index]

        if indexed:
            if len(dataset.vdims) == 1:
                da = dask_array_module()
                arr = np.squeeze(data[dataset.vdims[0].name])
                if da and isinstance(arr, da.Array):
                    arr = arr.compute()
                return arr if np.isscalar(arr) else arr[()]
            else:
                return np.array(
                    [np.squeeze(data[vd.name]) for vd in dataset.vdims])
        return data
Beispiel #40
0
 def _transform(self, c):
     return np.where(np.isnan(c), np.NaN, np.digitize(c, self.points))
Beispiel #41
0
def mapToCSR(mat: dict) -> Tuple[ndarray, ndarray, ndarray]:
    """
    Given a pyoptsparse matrix definition, return a tuple containing a
    map of the matrix to the CSR format.

    Parameters
    ----------
    mat : dict
       A sparse matrix representation.

    Returns
    -------
    tup : tuple of numpy arrays
        tup[0] : numpy array (size=num_rows+1)
            An array that holds the indices in col_idx and data at which each
            row begins.  The last index of contains the number of nonzero
            elements in the sparse array.
        tup[1] : numpy array (size=nnz)
            An array of the column indices of each element in data.
        tup[2] : numpy array (size=nnz)
            An indexing array which maps the elements in the data array
            to elements in the CSR data array.
    """
    if "csr" in mat:
        # First handle the trivial case CSR->CSR
        row_p = mat["csr"][IROW]
        col_idx = mat["csr"][ICOL]
        idx_data = np.s_[:]
        return row_p, col_idx, idx_data

    num_rows = mat["shape"][0]
    num_cols = mat["shape"][1]

    if "csc" in mat:
        # If given a CSC matrix, expand the column pointers so we
        # effectively have a COO representation.
        csc_colp = mat["csr"][ICOL]
        rows = mat["csc"][IROW]
        nnz = csc_colp[-1]

        # Allocate the COO maps
        cols = np.zeros(nnz, dtype="intc")

        # We already have a full representation of the columns.
        # We need to decompress the representation of the rows.
        for j in range(num_cols):
            cols[csc_colp[j] : csc_colp[j + 1]] = j

    elif "coo" in mat:
        rows = mat["coo"][IROW]
        cols = mat["coo"][ICOL]
        nnz = len(rows)

    # Allocate the row pointer array
    row_p = np.zeros(num_rows + 1, dtype="intc")

    # Get the sort order that puts data in row-major form
    idx_data = np.lexsort((cols, rows))

    # Apply the row-major indexing to the COO column and row indices
    col_idx = np.asarray(cols, dtype="intc")[idx_data]
    rows_rowmaj = np.asarray(rows, dtype="intc")[idx_data]

    # Now for i = 0 to num_rows-1, row_p[i] is the first occurrence
    # of i in rows_rowmaj
    row_p[:-1] = np.digitize(np.arange(num_rows), rows_rowmaj, right=True)

    # By convention store nnz in the last element of row_p
    row_p[-1] = nnz

    return row_p, col_idx, idx_data
Beispiel #42
0
def digitizeAmplitudesMono(y, bitdepth):

    bins = np.linspace(-1, 1, 2**bitdepth+1)
    y_digitized = bins[np.digitize(y, bins) - 1]
    return y_digitized, np.linspace(-1,1,2**bitdepth+1)
def feature_engineering_step1(_df):
    title_mapping = {
        'Capt': 'Mr',
        'Col': 'Mr',
        'Don': 'Mr',
        'Dr': 'Mr',
        'Jonkheer': 'Mr',
        'Lady': 'Mrs',
        'Major': 'Mr',
        'Master': 'Master',
        'Miss': 'Miss',
        'Mlle': 'Miss',
        'Mme': 'Mrs',
        'Mr': 'Mr',
        'Mrs': 'Mrs',
        'Ms': 'Miss',
        'Rev': 'Mr',
        'Sir': 'Mr',
        'the Countess': 'Mrs'
    }
    title_age_mapping = {
        'Capt': 'elder',
        'Col': 'elder',
        'Don': 'adult',
        'Dr': 'adult',
        'Jonkheer': 'adult',
        'Lady': 'elder',
        'Major': 'elder',
        'Master': 'young',
        'Miss': 'young',
        'Mlle': 'young',
        'Mme': 'adult',
        'Mr': 'adult',
        'Mrs': 'adult',
        'Ms': 'adult',
        'Rev': 'adult',
        'Sir': 'elder',
        'the Countess': 'adult'
    }
    cabin_mapping = {
        'A': 'M',
        'B': 'G',
        'C': 'M',
        'D': 'G',
        'E': 'G',
        'F': 'G',
        'G': 'M',
        'T': 'X',
        'X': 'X'
    }

    _df['Sex_'] = _df['Sex'].apply(lambda x: 1 if x=='female' else 0)

    _df['Title_'] = _df['Name'].apply(lambda x: x.replace('.',',').split(',')[1].strip())
    _df['FamilyName'] = _df['Name'].apply(lambda x: x.replace('.',',').split(',')[0].strip())

    #_df['Fare_'] = _df['Fare'].fillna(20)
    #_df['Fare_'] = _df['Fare_'].apply(lambda x: 40 if x > 40 else x)

    ####
    _df['Fare_'] = _df['Fare']
    _df.loc[ (_df.Fare.isnull())&(_df.Pclass==1),'Fare_'] =np.median(_df[_df['Pclass'] == 1]['Fare'].dropna())
    _df.loc[ (_df.Fare.isnull())&(_df.Pclass==2),'Fare_'] =np.median( _df[_df['Pclass'] == 2]['Fare'].dropna())
    _df.loc[ (_df.Fare.isnull())&(_df.Pclass==3),'Fare_'] = np.median(_df[_df['Pclass'] == 3]['Fare'].dropna())
    ####
    _df['Fare_'] = _df['Fare_'] / (1+_df['SibSp']+_df['Parch'])
    _df['HasFare'] = _df['Fare'].apply(lambda x: 0 if np.isnan(x) else 1)

    _df['Fare_b'] = np.digitize(_df['Fare_'], [0,5,10,20,30,40])

    # Family Size
    _df['FamilySize'] = (_df['SibSp'] + _df['Parch'])
    _df['HasFamily'] = (_df['SibSp'] + _df['Parch']).map(lambda x: 0 if x == 0 else 1)

    # Age
    _df['HasAge'] = _df['Age'].apply(lambda x: 0 if np.isnan(x) else 1)
    _df['Age_s'] = _df['Age'].apply(age_to_s)

    # or
    #_df['Age_'] = _df["Age"].fillna(_df["Age"].mean())
    # http://stackoverflow.com/questions/21050426/pandas-impute-nans

    # Title
    _df['Title_'] = _df['Name'].apply(lambda x: x.replace('.',',').split(',')[1].strip())
    _df.loc[(_df['Title_'].isnull()) & (_df['Sex']=='female'),('Title_')] = 'Miss'
    _df.loc[(_df['Title_'].isnull()) & (_df['Sex']=='male' ), ('Title_')] = 'Master'

    _df['Title_s'] = _df['Title_'].map(title_mapping)

    _df['Title_Age_s'] = _df['Title_'].map(title_age_mapping)
    _df['Title_Age_s'] = _df['Title_Age_s'].fillna('adult')

    ## fill age NAN:
    _df.loc[_df['HasAge']==0, ('Age_s')]= _df[_df['HasAge']==0]['Title_Age_s']

    # Cabin:
    _df['Cabin_'] = _df['Cabin'].apply(lambda x: 'X' if isinstance(x, float) else x[0])
    _df['Cabin_s'] = _df['Cabin_'].map(cabin_mapping)
    # NaN is no problem for get_dummies
    # However let's try to keep it as a feature called X

    # Embarked:
    _df['Embarked_'] = _df['Embarked'].apply(lambda x: 'S' if isinstance(x, float) else x)


    df_return = _df.loc[:,('Age','Age_s','HasAge', 'Sex','Pclass','Fare_', 'Fare_b','Title_s',
                     'Title_Age_s','Embarked_','Cabin_s', 'HasFamily', 'SibSp','Parch','FamilySize','FamilyName')]

    return df_return
def get_contextual_similarity(candidate_dataset_id, kb_entry, mention_contexts,
                              scispacy_parser, glove):
    """Computes contextual similarity scores between the candidate dataset description and
       the mention contexts using glove embeddings and cosine similarity.

       @param candidate_dataset_id: the id of the candidate dataset
       @param kb_entry: the knowledge base entry for the candidate dataset
       @param mention_contexts: a list of mention contexts to compute similarity over
       @param scispacy_parser: a scispacy parser
       @param glove: a dictionary of glove word embeddings
    """
    glove_dim = 50
    bins = np.linspace(0, 1, 11)
    num_bins = bins.shape[0]

    description = kb_entry["description"]
    if description == "":
        return [0] * num_bins, [0] * num_bins

    description = scispacy_parser.scispacy_create_doc(description)

    # try both max pooling and average pooling of word embeddings to get sentence representation
    embedded_description_max = []
    embedded_description_avg = []
    for sentence in description.sents:
        tokens = [t.text.lower() for t in sentence]
        glove_tokens = [t for t in tokens if t in glove]
        embedded_sentence = [
            np.linalg.norm(glove[t], ord=2) for t in glove_tokens
            if t not in NLTK_STOPWORDS
        ]
        # embedded_sentence = [embedding*idf_dict[t] if t in idf_dict else embedding*idf_dict["<MAX_VALUE>"] for embedding, t in zip(embedded_sentence, glove_token)]
        last_embedding_layer = embedded_sentence
        if last_embedding_layer == []:
            continue
        embedded_description_max.append(np.max(last_embedding_layer, axis=0))
        embedded_description_avg.append(np.mean(last_embedding_layer, axis=0))

    # try both max pooling and average pooling of word embeddings to get sentence representation
    embedded_contexts_max = []
    embedded_contexts_avg = []
    for context in mention_contexts:
        embedded_context_max = []
        embedded_context_avg = []
        for sentence in context[0]:
            tokens = [t.text.lower() for t in sentence]
            glove_tokens = [t for t in tokens if t in glove]
            embedded_sentence = [
                np.linalg.norm(glove[t], ord=2) for t in glove_tokens
                if t not in NLTK_STOPWORDS
            ]
            # embedded_sentence = [embedding*idf_dict[t] if t in idf_dict else embedding*idf_dict["<MAX_VALUE>"] for embedding, t in zip(embedded_sentence, glove_token)]
            last_embedding_layer = embedded_sentence
            if last_embedding_layer == []:
                continue
            embedded_context_max.append(np.max(last_embedding_layer, axis=0))
            embedded_context_avg.append(np.mean(last_embedding_layer, axis=0))
        embedded_contexts_max.append(embedded_context_max)
        embedded_contexts_avg.append(embedded_context_avg)

    cosine_distances_max = []
    cosine_distances_avg = []
    for context_max, context_avg in zip(embedded_contexts_max,
                                        embedded_contexts_avg):
        for sentence_max, sentence_avg in zip(context_max, context_avg):
            for description_max, description_avg in zip(
                    embedded_description_max, embedded_description_avg):
                max_cosine = scipy.spatial.distance.cosine(
                    sentence_max, description_max)
                avg_cosine = scipy.spatial.distance.cosine(
                    sentence_avg, description_avg)
                if not math.isnan(max_cosine):
                    cosine_distances_max.append(max_cosine)

                if not math.isnan(avg_cosine):
                    cosine_distances_avg.append(avg_cosine)

    # bin the similarity scores of description sentence and context sentence pairs
    digitized_max = np.digitize(cosine_distances_max, bins)
    digitized_avg = np.digitize(cosine_distances_avg, bins)

    binned_max = [0] * num_bins
    binned_avg = [0] * num_bins
    # use a one hot representation with a one for the largest similarity bin that has a pair in it
    binned_max[max(digitized_max) - 1] = 1
    binned_avg[max(digitized_avg) - 1] = 1

    return binned_max, binned_avg
Beispiel #45
0
    def read_gliderdata(self, lat, lon):
        path = os.path.join(self.gliders_directory, self.glider_name,
                            'from-glider', '%s*.[st]bd' % (self.glider_name))
        dbd = dbdreader.MultiDBD(pattern=path)
        if self.glider_name == 'sim':
            print("Warning: assuming simulator. I am making up CTD data!")
            t, P = dbd.get("m_depth")
            P /= 10
            C = np.ones_like(P) * 4
            T = np.ones_like(P) * 15
        else:
            tmp = dbd.get_sync("sci_water_cond",
                               "sci_water_temp sci_water_pressure".split())
            t_last = tmp[0][-1]
            age = t_last - tmp[0]
            t, C, T, P = tmp.compress(np.logical_and(tmp[1] > 0,
                                                     age < self.AGE * 3600),
                                      axis=1)
            try:
                _, u, v = dbd.get_sync("m_water_vx", ["m_water_vy"])
            except dbdreader.DbdError:
                try:
                    _, u, v = dbd.get_sync("m_final_water_vx",
                                           ["m_final_water_vy"])
                except dbdreader.DbdError:
                    u = np.array([0])
                    v = np.array([0])

            u, v = np.compress(np.logical_and(
                np.abs(u) < 1.5,
                np.abs(v) < 1.5), [u, v],
                               axis=1)
        rho = fast_gsw.rho(C * 10, T, P * 10, lon, lat)
        SA = fast_gsw.SA(C * 10, T, P * 10, lon, lat)
        # compute the age of each measurement, and the resulting weight.
        dt = t.max() - t
        weights = np.exp(-dt / (self.AGE * 3600))
        # make binned averages
        max_depth = P.max() * 10
        dz = 5
        zi = np.arange(dz / 2, max_depth + dz / 2, dz)
        bins = np.arange(0, max_depth + dz, dz)
        bins[0] = -10
        idx = np.digitize(P * 10, bins) - 1
        rho_avg = np.zeros_like(zi, float)
        SA_avg = np.zeros_like(zi, float)
        T_avg = np.zeros_like(zi, float)
        weights_sum = np.zeros_like(zi, float)
        for _idx, _w, _rho, _SA, _T in zip(idx, weights, rho, SA, T):
            try:
                rho_avg[_idx] += _rho * _w
                SA_avg[_idx] += _SA * _w
                T_avg[_idx] += _T * _w
                weights_sum[_idx] += _w
            except IndexError:
                continue
        # if data are sparse, it can be that ther are gaps
        j = np.unique(idx)
        zj = zi[j]
        rho_avg = rho_avg[j] / weights_sum[j]
        SA_avg = SA_avg[j] / weights_sum[j]
        T_avg = T_avg[j] / weights_sum[j]
        self.rho_fun = interp1d(zj,
                                rho_avg,
                                bounds_error=False,
                                fill_value=(rho_avg[0], rho_avg[-1]))
        self.SA_fun = interp1d(zj,
                               SA_avg,
                               bounds_error=False,
                               fill_value=(SA_avg[0], SA_avg[-1]))
        self.T_fun = interp1d(zj,
                              T_avg,
                              bounds_error=False,
                              fill_value=(T_avg[0], T_avg[-1]))

        if self.u_fun is None:  # not intialised yet, use last water current estimate available.
            self.u_fun = lambda x: u[-1]
            self.v_fun = lambda x: v[-1]
Beispiel #46
0
def gen_motormap ( pid, savedir ):
    '''
    Generate a controlled step motor map via GP to use to seed the
    convergence runs.

    Parameters
    ==========
    pid : int
     PID of the the cobra for which to generate motor maps
    savedir : str
     The directory path in which to save output.
    '''
    if not type(pid) is int:
        pid, savedir = pid
    mmap = {}
    for key in dirdict.keys ():
        fname = dirname + dirdict[key] + '/Log/PhiSpecMove_mId_1_pId_%i.txt' % pid
        if not os.path.exists ( fname ):
            print('Not found: %s' % fname)
            continue
        movesize, nmoves, niter = paramdict[key]
        
        ctrlstep = movement.read_ctrlstep ( fname, movesize, movesize=0,
                                            verbose=True, motor_id=middict[key])
        ctrlstep = ctrlstep.convert_objects ()

        #// filter ctrlstep based on Johannes' suggestions
        lowthresh = 0.01
        ctrlstep.loc[ctrlstep['stepsize'] < 0, 'stepsize'] = np.nan
        ctrlstep.loc[ctrlstep['stepsize'] > 1., 'stepsize'] = np.nan

        bins = np.arange ( 0., 400., 10. )        
        assns = np.digitize ( ctrlstep['startangle'], bins )
        grps = ctrlstep.groupby ( assns )
        ssmean = grps.mean()['stepsize']
        sscount = grps.count()['stepsize']
        
        #// cut on mean change or overpopulation
        deltam = abs(ssmean-ssmean.mean()) > 3.*ssmean.std()
        deltact = abs(sscount-sscount.mean()) > 3.*sscount.std()
        to_cut = ssmean.index[deltam|deltact]

        ctrlstep.loc[np.in1d(assns, to_cut),'stepsize'] = np.NaN

        slow_mask = ctrlstep['stepsize'] < lowthresh
        ctrlstep.loc[slow_mask, 'stepsize' ] += .03
        #mmap[key] = ctrlstep
        #continue

        mm = np.isfinite(ctrlstep).all(axis=1)
        try:
            gpmod, axarr = analyze.viz_gproc ( [ctrlstep.loc[mm]], angle_grid=angbins )
        except ValueError:
            return
        gpmod = gpmod[0]

        #// set no-data to mmap=0.1
        if 'stage2' in key:
            max_angle = 180.
        else:
            max_angle = 365.
        
        gap_thresh = 20.
        gaps = ctrlstep['startangle'].sort_values().diff().dropna() > gap_thresh
        gap_ends = gaps.loc[gaps].index
        gap_stts = set ()
        for eval in gap_ends:
            gap_stts.add(gaps.index[gaps.index.get_loc(eval) - 1])
        gap_ends = ctrlstep.loc[gap_ends,'startangle'].values.tolist()
        gap_stts = ctrlstep['startangle'].loc[gap_stts].values.tolist()
        
        if ctrlstep.loc[gaps.index[0],'startangle'] > gap_thresh:
            gap_stts.append(0)
            gap_ends.append(ctrlstep.loc[gaps.index[0],'startangle'])
        elif ctrlstep.loc[gaps.index[-1], 'startangle'] < (max_angle - gap_thresh):
            gap_stts.append(ctrlstep.loc[gaps.index[-1],'startangle'])
            gap_ends.append(max_angle)

        for start, end in zip ( gap_stts, gap_ends ):
            out_of_bounds = (gpmod.angle_grid>start)&(gpmod.angle_grid<end)
            gpmod.shape_mu[out_of_bounds] = 0.1/gpmod.mmean
            
        gpmod.shape_mu[(gpmod.shape_mu*gpmod.mmean)<.02] = .02/gpmod.mmean

        axarr[0].plot ( gpmod.angle_grid, gpmod.shape_mu*gpmod.mmean,
                        '--', color='dodgerblue')
        axarr[1].plot ( gpmod.angle_grid, gpmod.shape_mu, '--', color='dodgerblue')
        axarr[0].set_ylim(0., 0.25)

        mmap[key] = gpmod
        np.savetxt ( savedir + '/pid%i_%s.dat' % (pid, key ),
                     gpmod.shape_mu * gpmod.mmean)
        plt.savefig(savedir + '/figures/pid%i_%s.png' % (pid, key) )
        plt.close('all')        
    return mmap
Beispiel #47
0
 def p(new_df: pd.DataFrame) -> pd.DataFrame:
     col_biner = lambda col: np.where(
         new_df[col].isnull(), nan,
         np.digitize(new_df[col], bins[col], right=right))
     bined_columns = {col: col_biner(col) for col in columns_to_bin}
     return new_df.assign(**bined_columns)
Beispiel #48
0
    def histogram(self, outputs, options, units, setting):
        """ 
        Produces histograms for the given outputs, tailored to the classes of
        data selected. First determines the bins, then populates these, then
        plots the result using Matplotlib.
        'options' determines the types of data available.
        'setting' determines which data types are required
        'units' are the units of measurement strings corresponding to the
        respective data classes. these are inserted into the histogram legends.
        
        """

        kCs, kNs, keCs, krs = 'actual Cs', 'actual Ns', 'est. Cs', 'recognition'
        xmode, ymode, data_cfg = self.unwrap_cfg(options, setting)
        xmode_unit, ymode_unit = units[xmode], units[ymode]
        self.logger.info('creating %s against %s histogram' % (ymode, xmode))
        match = (data_cfg == 'matched')
        nr_bins = 100

        plt_data = {xmode: np.zeros((nr_bins, )), ymode: np.zeros((nr_bins, ))}

        if data_cfg == 'recognition':
            get = kCs, keCs, kNs, krs
        if data_cfg == 'matched':
            get = kCs, keCs, kNs
        data = self.get_data(outputs, get=get, match=match)

        Y = np.zeros((data[kCs].shape[0], ))
        if ymode == 'mean error':
            for i, (C, eC) in enumerate(zip(data[kCs], data[keCs])):
                dif = C - eC
                Y[i] = np.linalg.norm(dif)
        elif ymode == 'recognition rate':
            Y = data[krs].flatten()

        X = np.zeros(data[kCs].shape[0])

        if xmode == 'angle':
            for i, (C, N) in enumerate(zip(data[kCs], data[kNs])):
                X[i] = self.get_angle(-C, N)

        elif xmode == 'distance to cam':
            for i, C in enumerate(data[kCs]):
                X[i] = np.linalg.norm(C)

        Xmin, Xmax = np.min(X), np.max(X)
        bin_bounds, step = np.linspace(Xmin, Xmax, nr_bins + 1, retstep=True)
        plt_data[xmode] = bin_bounds[:-1]  # bin boundary values
        X_bin_indices = np.digitize(X, plt_data[xmode])
        bincount = np.zeros((nr_bins, ))

        bins = [[] for x in xrange(nr_bins)]
        for i, y in enumerate(Y):
            plt_data[ymode][X_bin_indices[i] - 1] += y
            bincount[X_bin_indices[i] - 1] += 1
            bins[X_bin_indices[i] - 1].append(y)
        std = np.zeros((nr_bins, ))
        for i in xrange(nr_bins):
            if bins[i]:
                std[i] = np.std(bins[i])

        for i, total in enumerate(plt_data[ymode]):
            if (1. * total * bincount[i]) == 0:
                continue
            plt_data[ymode][i] = total / bincount[i]

        fig = plt.figure()
        ax = plt.subplot(111)
        rwidth = step
        if ymode == 'recognition rate':
            rects = ax.bar(plt_data[xmode], plt_data[ymode], rwidth, color='b')
        else:
            rects = ax.bar(plt_data[xmode],
                           plt_data[ymode],
                           rwidth,
                           color='b',
                           yerr=std,
                           ecolor='r')

        ax.set_title('%s against %s' % (ymode, xmode))
        ax.set_xlabel(xmode + xmode_unit)
        ax.set_ylabel(ymode + ymode_unit)
        ax.grid(True)
        if data_cfg == 'arecognition':
            for rect in rects:
                height = rect.get_height()
                ax.text(rect.get_x() + rect.get_width() / 2.,
                        1.05 * height,
                        '%d' % int(height),
                        ha='center',
                        va='bottom')

        plt.show()
Beispiel #49
0
    def find_events_fast(self):
        if self._triggers is None:
            return None
        # self.filterBadTriggers()
        if self._triggers.size < 5:
            return None
        self.filterBadTriggers()
        if self._toa is None:
            return None
        if self._toa.size == 0:
            # Clear out the triggers since they have nothing
            return None

        # Get our start/end triggers to get events
        start = self._triggers[0:-1:]
        if start.size == 0:
            return None

        min_window, max_window = self._eventWindow

        trigger_counter = np.arange(self._trigger_counter,
                                    self._trigger_counter + start.size,
                                    dtype=np.int)

        self._trigger_counter = trigger_counter[-1] + 1

        # end = self._triggers[1:-1:]
        # Get the first and last triggers in pile
        first_trigger = start[0]
        last_trigger = start[-1]
        # print('First Trigger las trigger',first_trigger,last_trigger)
        # print('TOA before',self._toa)
        # Delete useless pixels behind the first trigger
        self.updateBuffers(self._toa >= first_trigger)
        # grab only pixels we care about
        x, y, toa, tot = self.getBuffers(self._toa < last_trigger)
        # print('triggers',start)
        # print('TOA',toa)
        self.updateBuffers(self._toa >= last_trigger)
        try:
            event_mapping = np.digitize(toa, start) - 1
        except Exception as e:
            self.error('Exception has occured {} due to ', str(e))
            self.error('Writing output TOA {}'.format(toa))
            self.error('Writing triggers {}'.format(start))
            self.error('Flushing triggers!!!')
            self._triggers = self._triggers[-1:]
            return None
        event_triggers = self._triggers[:-1:]
        self._triggers = self._triggers[-1:]

        # print('Trigger delta',triggers,np.ediff1d(triggers))

        tof = toa - event_triggers[event_mapping]
        event_number = trigger_counter[event_mapping]

        exp_filter = (tof >= min_window) & (tof <= max_window)

        result = event_number[exp_filter], x[exp_filter], y[exp_filter], tof[
            exp_filter], tot[exp_filter]

        if result[0].size > 0:
            return result
        else:
            return None
Beispiel #50
0
from skimage import data
from skimage.filters import threshold_multiotsu

# Setting the font size for all plots.
matplotlib.rcParams['font.size'] = 9

# The input image.
image = data.camera()

# Applying multi-Otsu threshold for the default value, generating
# three classes.
thresholds = threshold_multiotsu(image)

# Using the threshold values, we generate the three regions.
regions = np.digitize(image, bins=thresholds)

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10, 3.5))

# Plotting the original image.
ax[0].imshow(image, cmap='gray')
ax[0].set_title('Original')
ax[0].axis('off')

# Plotting the histogram and the two thresholds obtained from
# multi-Otsu.
ax[1].hist(image.ravel(), bins=255)
ax[1].set_title('Histogram')
for thresh in thresholds:
    ax[1].axvline(thresh, color='r')
        'output/enrichment/GTEx_maf_tss_binned/bins.{suffix}'
    run:
        maf_bins = np.linspace(0, 1, 51)
        tss_bins = np.linspace(-500000, 500000, 51)

        bins = {}
        for m in range(len(maf_bins)):
            bins[m] = {}
            for t in range(len(tss_bins)):
                bins[m][t] = defaultdict(list)

        with open(input[0], 'r') as f:
            for line in f:
                chromosome, variant, gene, maf, dtss = line.split('\t')
                if float(maf) > 0.01:
                    maf_bin = np.digitize(float(maf), maf_bins)
                    tss_bin = np.digitize(float(dtss), tss_bins)
                    bins[maf_bin][tss_bin][variant].append(gene)
        json.dump(bins, open(output[0], 'w'))

rule make_gene_variant_lookup:
    input:
        'output/enrichment/GTEx_maf_tss/GTEx_maf_tss.{suffix}'
    output:
        expand('output/enrichment/GTEx_maf_tss_lookup/chr{chr}/chr{chr}.lookup.{suffix}',
            chr=list(range(1, 23)), suffix='{suffix}')
    run:
        lookup = {'chr{}'.format(x): defaultdict(dict) for x in range(1, 23)}
        with open(input[0], 'r') as f:
            for line in f:
                chromosome, variant, gene, maf, dtss = line.strip().split('\t')
Beispiel #52
0
def histplot_raw(datas,
                 bins,
                 labels,
                 weights=None,
                 removenorm=False,
                 scale=1.,
                 doerrorbar=True,
                 **kwargs):
    settings = {
        "xlabel": r"$m_{Vh}[GeV]$",
        "ylabel": 'Number of Events',
        "title1":
        r"ATLAS",  # \newline Ptl next-leading, full cuts, 2 b-tags $",
        "title1_1": r"Internal",
        "title2":
        r"$\mathit{\sqrt{s}=13\:TeV,139\:fb^{-1}}$",  # Ptl next-leading, full cuts, 2 b-tags $",
        #"title3": r"$\mathbf{2\;lep.,2\;b-tag}$",
        "title3": "2 lep., 2 b-tag",
        "filename": "deltatest2",
        "log_y": False,
        "norm": False,
        "upper_y": 1.5,
    }
    for each_key in kwargs.items():
        settings[each_key[0]] = kwargs[each_key[0]]

    if weights is None:
        weights = []
        for each in datas:
            weights.append(np.ones(len(each)))

    if removenorm:
        for i in range(len(weights)):
            weights[i] = np.array(weights[i]) / np.sum(weights[i])
    sigmas = []
    weight_in_binses = []
    for i in range(len(datas)):
        event_location = np.digitize(datas[i] / scale, bins)
        sigma2 = []
        weight_in_bins = []
        for j in range(np.size(bins) - 1):
            bin_weight = weights[i][np.where(event_location == j + 1)[0]]
            sigma2.append(np.sum(bin_weight**2.))
            weight_in_bins.append(np.sum(bin_weight))
        sigmas.append(np.array(sigma2)**0.5)
        weight_in_binses.append(np.array(weight_in_bins))

    colors = ['b', 'g', 'r', 'c', 'm', 'y']
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.hist((np.array(datas) / scale).tolist(),
            bins,
            histtype='step',
            fill=False,
            color=colors[0:len(datas)],
            weights=np.array(weights).tolist())
    bins = np.array(bins)
    for i in range(len(datas)):
        bin_centre = (bins[0:-1] + bins[1:]) / 2
        if doerrorbar:
            ax.errorbar(bin_centre,
                        weight_in_binses[i],
                        xerr=0.0001,
                        yerr=sigmas[i],
                        fmt='.',
                        color=colors[i],
                        label=str(labels[i]))
        else:
            ax.errorbar(bin_centre,
                        weight_in_binses[i],
                        xerr=0.0001,
                        yerr=0,
                        fmt='_',
                        color=colors[i],
                        label=str(labels[i]))
    ax.legend(loc='upper right', prop={'size': 20}, frameon=False)

    ymin, ymax = ax.get_ylim()
    ax.set_ylim([0, ymax * settings["upper_y"]])
    ax.text(0.05,
            1.55 / 1.7,
            settings['title1'],
            fontsize=25,
            transform=ax.transAxes,
            style='italic',
            fontweight='bold')
    ax.text(0.227,
            1.55 / 1.7,
            settings['title1_1'],
            fontsize=25,
            transform=ax.transAxes)
    ax.text(0.05,
            1.40 / 1.7,
            settings['title2'],
            fontsize=23,
            transform=ax.transAxes,
            style='italic',
            fontweight='bold')
    ax.text(0.05,
            1.26 / 1.7,
            settings['title3'],
            fontsize=18,
            weight='bold',
            style='italic',
            transform=ax.transAxes)
    ax.set_ylabel(settings['ylabel'], fontsize=20)
    ax.set_xlabel(settings['xlabel'], fontsize=20)
    if settings['log_y']:
        ax.set_yscale('log')
        ax.set_ylim([0.1, 10**(math.log10(ymax) * settings["upper_y"])])
        ax.yaxis.set_major_locator(
            matplotlib.ticker.LogLocator(base=10, numticks=100))
        ax.minorticks_on()

    fig.savefig(settings['filename'] + '.pdf',
                bbox_inches='tight',
                pad_inches=0.25)
Beispiel #53
0
def consolidate_sweep_results(file_path, plot_hist=True):
    results = []
    #for i in xrange(num_exps):
    #    results.append(py_scripts_yann.load_pickle(file_path+'_'+str(i)+'.pkl'))
    import json
    json_data = open(file_path + '/config.json').read()
    sim_pars = json.loads(json_data)['sim_pars']

    sweep_results = {
        'pc_rval_mean':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'pc_rval_std':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_connprob_mean':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_connprob_std':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_connprob_mean':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_connprob_std':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_pc_connprob':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_pc_connprob':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_pc_std':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_pc_std':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_pc_mean':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_pc_mean':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_selectivity_mean':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_selectivity_mean':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_selectivity_std':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_selectivity_std':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_selectivity_upper':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_selectivity_upper':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_selectivity_max':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_selectivity_max':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'uniform_pc_input':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
        'diverse_pc_input':
        np.zeros((len(sim_pars['par_sweep_vals']),
                  len(sim_pars['par_sweep_vals_2']))),
    }

    for par_val_idx in xrange(len(sim_pars['par_sweep_vals'])):
        par_val = sim_pars['par_sweep_vals'][par_val_idx]
        if not sim_pars['par_sweep_key_2'] == None:
            iter_pars = sim_pars.copy()
            iter_pars[sim_pars['par_sweep_key']] = par_val
            for par_val_2_idx in xrange(len(sim_pars['par_sweep_vals_2'])):
                par_val_2 = sim_pars['par_sweep_vals_2'][par_val_2_idx]
                iter_pars[sim_pars['par_sweep_key_2']] = par_val_2
                str_i = sim_pars['par_sweep_key'] + str(
                    iter_pars[sim_pars['par_sweep_key']]
                ) + sim_pars['par_sweep_key_2'] + str(
                    iter_pars[sim_pars['par_sweep_key_2']])
                res_file = os.path.join(
                    file_path,
                    str(sim_pars['sim_title']) + '_' + str(str_i) + '.pkl')
                try:
                    temp_results = py_scripts_yann.load_pickle(res_file)
                    sweep_results['pc_rval_mean'][
                        par_val_idx, par_val_2_idx] = np.mean([
                            temp_results['selectivities_results_corr'][i]
                            ['pop_coupling_partial_rval']
                            for i in xrange(sim_pars['N_sims'])
                        ])
                    sweep_results['pc_rval_std'][
                        par_val_idx, par_val_2_idx] = np.std([
                            temp_results['selectivities_results_corr'][i]
                            ['pop_coupling_partial_rval']
                            for i in xrange(sim_pars['N_sims'])
                        ])

                    unif_connprob_temp = []
                    diverse_connprob_temp = []
                    #unif_connprob_std_temp = []
                    #diverse_connprob_std_temp = []
                    unif_corr_pc_connprob = []
                    diverse_corr_pc_connprob = []
                    unif_pc_temp = []
                    diverse_pc_temp = []
                    unif_input_temp = []
                    diverse_input_temp = []
                    uniform_selectivity_temp = []
                    diverse_selectivity_temp = []
                    unif_corr_pc_input = []
                    diverse_corr_pc_input = []
                    for i in xrange(sim_pars['N_sims']):
                        print 'sim idx ', i
                        unif_connprob_temp.append(
                            np.mean(temp_results['simresults_uniform'][i]
                                    ['W_conn'],
                                    axis=0))
                        diverse_connprob_temp.append(
                            np.mean(
                                temp_results['simresults_corr'][i]['W_conn'],
                                axis=0))
                        unif_input_temp.append(
                            np.sum(temp_results['simresults_uniform'][i]
                                   ['W_plastic'],
                                   axis=0))
                        diverse_input_temp.append(
                            np.sum(temp_results['simresults_corr'][i]
                                   ['W_plastic'],
                                   axis=0))
                        #unif_connprob_std_temp.append(np.std(temp_results['simresults_uniform'][i]['W_conn'],axis=0))
                        #diverse_connprob_std_temp.append(np.std(temp_results['simresults_corr'][i]['W_conn'],axis=0))
                        unif_pc_temp.append(
                            temp_results['selectivities_results_uniform'][i]
                            ['empirical_pop_coupling'])
                        diverse_pc_temp.append(
                            temp_results['selectivities_results_corr'][i]
                            ['empirical_pop_coupling'])
                        unif_corr_pc_connprob.append(
                            stats.pearsonr(
                                temp_results['selectivities_results_uniform']
                                [i]['empirical_pop_coupling'],
                                unif_connprob_temp[-1]))
                        diverse_corr_pc_connprob.append(
                            stats.pearsonr(
                                temp_results['selectivities_results_corr'][i]
                                ['empirical_pop_coupling'],
                                diverse_connprob_temp[-1]))
                        unif_corr_pc_input.append(
                            stats.pearsonr(
                                temp_results['selectivities_results_uniform']
                                [i]['empirical_pop_coupling'],
                                unif_input_temp[-1]))
                        diverse_corr_pc_input.append(
                            stats.pearsonr(
                                temp_results['selectivities_results_corr'][i]
                                ['empirical_pop_coupling'],
                                diverse_input_temp[-1]))
                        uniform_selectivity_temp.append(
                            temp_results['selectivities_t_uniform'][-1])
                        diverse_selectivity_temp.append(
                            temp_results['selectivities_t_corr'][-1])
                    sweep_results['uniform_connprob_mean'][
                        par_val_idx,
                        par_val_2_idx] = np.mean(np.array(unif_connprob_temp))
                    sweep_results['uniform_connprob_std'][
                        par_val_idx,
                        par_val_2_idx] = np.std(np.array(unif_connprob_temp))
                    sweep_results['diverse_connprob_mean'][
                        par_val_idx, par_val_2_idx] = np.mean(
                            np.array(diverse_connprob_temp))
                    sweep_results['diverse_connprob_std'][
                        par_val_idx, par_val_2_idx] = np.std(
                            np.array(diverse_connprob_temp))
                    sweep_results['diverse_pc_connprob'][
                        par_val_idx, par_val_2_idx] = np.mean(
                            np.array(diverse_corr_pc_connprob))
                    sweep_results['uniform_pc_connprob'][
                        par_val_idx, par_val_2_idx] = np.mean(
                            np.array(unif_corr_pc_connprob))
                    sweep_results['diverse_pc_input'][
                        par_val_idx, par_val_2_idx] = np.mean(
                            np.array(diverse_corr_pc_input))
                    sweep_results['uniform_pc_input'][
                        par_val_idx,
                        par_val_2_idx] = np.mean(np.array(unif_corr_pc_input))
                    sweep_results['diverse_pc_mean'][
                        par_val_idx,
                        par_val_2_idx] = np.mean(np.array(diverse_pc_temp))
                    sweep_results['uniform_pc_mean'][
                        par_val_idx,
                        par_val_2_idx] = np.mean(np.array(unif_pc_temp))
                    sweep_results['diverse_pc_std'][
                        par_val_idx,
                        par_val_2_idx] = np.std(np.array(diverse_pc_temp))
                    sweep_results['uniform_pc_std'][par_val_idx,
                                                    par_val_2_idx] = np.std(
                                                        np.array(unif_pc_temp))
                    sweep_results['diverse_selectivity_mean'][
                        par_val_idx, par_val_2_idx] = np.mean(
                            np.array(diverse_selectivity_temp))
                    sweep_results['diverse_selectivity_std'][
                        par_val_idx, par_val_2_idx] = np.std(
                            np.array(diverse_selectivity_temp))
                    sweep_results['diverse_selectivity_upper'][
                        par_val_idx, par_val_2_idx] = np.percentile(
                            np.array(diverse_selectivity_temp), 90)
                    sweep_results['diverse_selectivity_max'][
                        par_val_idx, par_val_2_idx] = np.max(
                            np.array(diverse_selectivity_temp))
                    sweep_results['uniform_selectivity_mean'][
                        par_val_idx, par_val_2_idx] = np.mean(
                            np.array(uniform_selectivity_temp))
                    sweep_results['diverse_selectivity_std'][
                        par_val_idx, par_val_2_idx] = np.std(
                            np.array(diverse_selectivity_temp))
                    sweep_results['uniform_selectivity_upper'][
                        par_val_idx, par_val_2_idx] = np.percentile(
                            np.array(uniform_selectivity_temp), 90)
                    sweep_results['uniform_selectivity_max'][
                        par_val_idx, par_val_2_idx] = np.max(
                            np.array(uniform_selectivity_temp))
                    plt.figure()
                    plt.hist([
                        np.array(unif_connprob_temp),
                        np.array(diverse_connprob_temp)
                    ], 20)
                    plt.legend(['uniform', 'diverse'])
                    plt.savefig(
                        os.path.join(
                            file_path,
                            str(sim_pars['sim_title']) + '_' + str(str_i) +
                            '_connprob_hist.pdf'))
                    plt.cla()

                    sns.jointplot(np.array(diverse_pc_temp),
                                  np.array(diverse_connprob_temp),
                                  kind='hexbin',
                                  ylim=(0.0, 1.0))
                    plt.title('Plasticity-connectivity link, diverse')
                    plt.savefig(
                        os.path.join(
                            file_path,
                            str(sim_pars['sim_title']) + '_' + str(str_i) +
                            'pc_connprob_diverse.pdf'))
                    plt.cla()
                    sns.jointplot(np.array(unif_pc_temp),
                                  np.array(unif_connprob_temp),
                                  kind='hexbin',
                                  ylim=(0.0, 1.0))
                    plt.title('Plasticity-connectivity link, uniform')
                    plt.savefig(
                        os.path.join(
                            file_path,
                            str(sim_pars['sim_title']) + '_' + str(str_i) +
                            '_pc_connprob_uniform.pdf'))
                    plt.cla()

                    plt.figure()
                    digit = np.digitize(
                        np.array(diverse_pc_temp).flatten(),
                        np.arange(-1.0, 1.0, 0.2))
                    plt.plot([
                        np.mean(
                            np.array(diverse_connprob_temp).flatten()[
                                digit == i]) for i in xrange(10)
                    ])
                    digit = np.digitize(
                        np.array(unif_pc_temp).flatten(),
                        np.arange(-1.0, 1.0, 0.1))
                    plt.plot([
                        np.mean(
                            np.array(unif_connprob_temp).flatten()[digit == i])
                        for i in xrange(10)
                    ])
                    plt.title('PC-connectivity link')
                    plt.legend(['diverse', 'uniform'])
                    plt.savefig(
                        os.path.join(
                            file_path,
                            str(sim_pars['sim_title']) + '_' + str(str_i) +
                            '_pc_connprob_link.pdf'))
                    plt.cla()
                except:
                    pass
        else:
            iter_pars = sim_pars.copy()
            iter_pars[sim_pars['par_sweep_key']] = par_val
            str_i = sim_pars['par_sweep_key'] + str(
                iter_pars[sim_pars['par_sweep_key']])  # Passing the list
            res_file = os.path.join(
                file_path,
                str(sim_pars['sim_title']) + '_' + str(str_i) + '.pkl')
            temp_results = py_scripts_yann.load_pickle(res_file)
            sweep_results['pc_rval_mean'][
                par_val_idx, par_val_2_idx] = np.mean([
                    temp_results['selectivities_results_corr'][i]
                    ['pop_coupling_partial_rval']
                    for i in xrange(sim_pars['N_sims'])
                ])
            sweep_results['pc_rval_std'][par_val_idx, par_val_2_idx] = np.std([
                temp_results['selectivities_results_corr'][i]
                ['pop_coupling_partial_rval']
                for i in xrange(sim_pars['N_sims'])
            ])

    if plot_hist:
        plt.pcolor(sweep_results['pc_rval_mean'])
        plt.title('Plasticity-coupling link')
        plt.colorbar()
        plt.savefig(
            os.path.join(
                file_path,
                str(sim_pars['sim_title']) + '_' + str(str_i) +
                '_plasticity_coupling_link_mean.pdf'))
        plt.show()
        plt.pcolor(sweep_results['pc_rval_std'])
        plt.title('Plasticity-coupling link variability')
        plt.colorbar()
        plt.savefig(
            os.path.join(
                file_path,
                str(sim_pars['sim_title']) + '_' + str(str_i) +
                '_plasticity_coupling_link_std.pdf'))
        plt.show()
        plt.pcolor(sweep_results['uniform_connprob_std'])
        plt.title('input connectivity width, uniform')
        plt.colorbar()
        plt.savefig(
            os.path.join(
                file_path,
                str(sim_pars['sim_title']) + '_' + str(str_i) +
                '_connectivity_width_uniform.pdf'))
        plt.show()
        plt.pcolor(sweep_results['diverse_connprob_std'])
        plt.title('input connectivity width, diverse')
        plt.savefig(
            os.path.join(
                file_path,
                str(sim_pars['sim_title']) + '_' + str(str_i) +
                '_connectivity_width_diverse.pdf'))
        plt.colorbar()
        plt.show()
        plt.pcolor(sweep_results['diverse_pc_connprob'])
        plt.title('pc-connprob link, diverse')
        plt.colorbar()
        plt.savefig(
            os.path.join(
                file_path,
                str(sim_pars['sim_title']) + '_' + str(str_i) +
                '_pc_connprob_diverse.pdf'))
        plt.show()
        plt.pcolor(sweep_results['uniform_pc_connprob'])
        plt.title('pc-connprob link, uniform')
        plt.colorbar()
        plt.savefig(
            os.path.join(
                file_path,
                str(sim_pars['sim_title']) + '_' + str(str_i) +
                '_pc_connprob_uniform.pdf'))
        plt.show()
    return sweep_results
Beispiel #54
0
print(f"desired scale:{desired_scale}")
desired_size = (
    int(img_gray.shape[1] * desired_scale),
    int(img_gray.shape[0] * desired_scale),
)

img_resized = cv2.resize(img_gray, desired_size)
print(f"resized size:{img_resized.shape}")

bins = np.arange(
    start=0.0,
    stop=255,
    step=255/quantize_num
)
print(f"bins:{bins}")
img_mozaic = np.digitize(img_resized, bins)


with xlsxwriter.Workbook('result.xlsx') as workbook:
    worksheet = workbook.add_worksheet()
    # Write pixel intensity.
    for y in range(0, img_mozaic.shape[0]):
        for x in range(0, img_mozaic.shape[1]):
            e_num = img_mozaic.item(y, x)
            worksheet.write_number(
                row=y,
                col=x,
                number=e_num if not inverse else (quantize_num-e_num)
            )
    # Set width and height.
    for y in range(0, img_mozaic.shape[0]):
Beispiel #55
0
    def estimateFromForces(self, pathCart):
        def movingAverage(values, window):
            weights = np.repeat(1.0, window) / window
            sma = np.convolve(values, weights, 'valid')
            return sma

        numBins = 51
        mostForceThresh = 0.5  #only look at data of (mostForceThresh*100) percentile
        window_size = 15  #for moving average filter

        forces = self.getCartForces(pathCart)
        t = np.arange(np.shape(forces)[0])
        xForces = forces[:, 0]
        # print(xForces)
        yForces = forces[:, 2]
        zForces = forces[:, 1]

        xForcesMA = movingAverage(xForces, window_size)
        yForcesMA = movingAverage(yForces, window_size)
        zForcesMA = movingAverage(zForces, window_size)

        #estimate x position
        xzvt = np.array([pathCart[window_size - 1:, 0], xForcesMA / zForcesMA])
        # print(xzvt)
        bins = np.linspace(-0.5, 0.5, numBins)
        xzvt[0, :] = np.digitize(xzvt[0, :], bins)

        binSum = np.zeros(len(bins))
        i = 0
        while i < len(bins):
            currentBin = np.argwhere([
                (xzvt[0, :] == i),
                (xzvt[1, :] > np.quantile(xzvt[:, 1], mostForceThresh))
            ])  #get upper mostForceThresh% values from each bin
            binSum[i] = np.sum(xzvt[1, currentBin]) / (np.count_nonzero([
                xzvt[0, :] == i,
                (xzvt[1, :] > np.quantile(xzvt[:, 1], mostForceThresh))
            ]))  #total number of times the bin is used
            i += 1

        print(binSum)

        polyOrder = 2
        bestFitxzvt = np.polyfit(bins[np.logical_not(np.isnan(binSum))],
                                 binSum[np.logical_not(np.isnan(binSum))],
                                 polyOrder)
        pbins = np.poly1d(bestFitxzvt)
        xpbins = np.linspace(-0.5, 0.5, 100)

        critX = pbins.deriv().r
        r_critX = critX[critX.imag == 0].real
        testX = pbins.deriv(2)(r_critX)
        x_maxX = r_critX[testX > 0]
        y_min = pbins(x_maxX)
        print("shoulder x is = ", x_maxX)

        #estimate z position
        zxvt = np.array([pathCart[window_size - 1:, 2], zForcesMA / xForcesMA])
        # print(zvt)
        bins = np.linspace(-0.5, 0.5, numBins)
        zxvt[0, :] = np.digitize(zxvt[0, :], bins)

        binSum = np.zeros(len(bins))
        i = 0
        while i < len(bins):
            currentBin = np.argwhere([
                (zxvt[0, :] == i),
                (zxvt[1, :] > np.quantile(zxvt[:, 1], mostForceThresh))
            ])  #get upper mostForceThresh% values from each bin
            binSum[i] = np.sum(zxvt[1, currentBin]) / (np.count_nonzero([
                zxvt[0, :] == i,
                (zxvt[1, :] > np.quantile(zxvt[:, 1], mostForceThresh))
            ]))  #total number of times the bin is used
            i += 1

        print(binSum)

        polyOrder = 2
        bestFitzxvt = np.polyfit(bins[np.logical_not(np.isnan(binSum))],
                                 binSum[np.logical_not(np.isnan(binSum))],
                                 polyOrder)
        pbins = np.poly1d(bestFitzxvt)
        xpbins = np.linspace(-0.5, 0.5, 100)

        critZ = pbins.deriv().r
        r_critZ = critZ[critZ.imag == 0].real
        testZ = pbins.deriv(2)(r_critZ)
        x_maxZ = r_critZ[testZ < 0]
        y_min = pbins(x_maxZ)
        print("shoulder z is = ", x_maxZ)

        #OLD VERSION WITHOUT MOVING AVERAGE AND BINNING
        # forcesCart = self.getCartForces(pathCart)
        # xForces = forcesCart[forcesCart[:,0].argsort()]
        # polyOrder = 4 #start with 2nd order, try again and again until there is a negative coeffieienct on largest term
        # bestFitX = np.polyfit(pathCart[:,0],forcesCart[:,0],polyOrder)
        # # print(bestFitX)
        # pX = np.poly1d(bestFitX)
        # xpX= np.linspace(-1,1,100)
        # critX = pX.deriv().r
        # r_critX = critX[critX.imag==0].real
        # testX = pX.deriv(2)(r_critX)
        # x_maxX = r_critX[testX<0]
        # y_min = pX(x_maxX)
        # # print("shoulder x is = ", max(x_maxX, key=abs))

        # yForces = forcesCart[forcesCart[:,2].argsort()]
        # polyOrder = 2
        # bestFitY = np.polyfit(pathCart[:,2],forcesCart[:,2],polyOrder)
        # # print(bestFitY)
        # pY = np.poly1d(bestFitY)
        # xpY = np.linspace(-1,1,100)
        # critY = pY.deriv().r
        # r_critY = critY[critY.imag==0].real
        # testY = pY.deriv(2)(r_critY)
        # x_maxY = r_critY[testY<0]
        # y_min = pY(x_maxY)
        # # print("shoulder y is = ", max(x_maxY, key=abs))

        # zForces = forcesCart[forcesCart[:,1].argsort()]
        # polyOrder = 2
        # bestFitZ = np.polyfit(pathCart[:,1],forcesCart[:,1],polyOrder)
        # # print(bestFitZ)
        # pZ = np.poly1d(bestFitZ)
        # xpZ = np.linspace(-1,1,100)
        # critZ = pZ.deriv().r
        # r_critZ = critZ[critZ.imag==0].real
        # testZ = pZ.deriv(2)(r_critZ)
        # x_maxZ = r_critZ[testZ<0]
        # y_min = pY(x_maxZ)
        # # print("shoulder Z is = ", max(x_maxZ, key=abs))

        #assume constant shoulder heighy x_maxY
        x_maxY = 0.2

        bestEst = np.array([[x_maxX, x_maxY, x_maxZ]])
        print("best estimate from forces = ", bestEst)
        return (bestEst)
def histogram_pair(value_vec, binary_vec, bins, smoothing_const=.01,
                   prior_prob=.5, rel_risk=False, error_bar_alpha=.05,
                   figsize = (12,6), **kwargs):
    """Plot the relationship between a numerical feature and a binary outcome.

    This will create two plots stacked vertically.  The upper plot
    is a stacked histogram showing the the counts of 0 and 1 in each
    respective bin.

    The lower plot shows the marginal empirical probability of being a 1
    given that the numerical feature is in a particular value range.

    This gives a simple way to assess the relationship between the
    two variables, especially if it is non-linear. Error bars are also
    shown to demonstrate the confidence of the empirical probability
    (based on the Beta distribution)

    Parameters
    ----------

    value_vec : array-like (containing numerical values)
        The array of numerical values that we are exploring

    binary_vec : array_like (containing 0/1 values)
        The array of binary values that we are exploring

    bins : list or numpy array
        The bin endpoints to use, as if constructing a histogram.

    smoothing_const : float, default = .01
        To avoid issues when a bin contains few or no data points,
        we add in a small number of both positive and negative
        observations to each bin. This controls the weight of the
        added data.

    prior_prob : float, default = .5
        The prior probability reflected by the added data.

    rel_risk : bool, default is False
        If True, this will plot log(emp_prob/prior_prob) rather
        on the y-axis rather than emp_prob.

    error_bar_alpha : float default=.05
        The alpha value to use for the error bars (based on
        the Beta distribution).  Default is 0.05 corresponding
        to a 95% confidence interval.

    figsize : tuple of 2 floats, default=(12,6)
        The size of the "canvas" to use for plotting.

    **kwargs : other
        Other parameters to be passed to the plt.hist command.
    """
    nan_mask = np.isnan(value_vec)
    num_nans = np.sum(nan_mask)
    if num_nans > 0:
        nan_binary_vec = binary_vec[nan_mask]
        binary_vec = binary_vec[~nan_mask]
        value_vec = value_vec[~nan_mask]
        nan_avg_value = np.mean(nan_binary_vec)
        reg_avg_value = np.mean(binary_vec)
    out0 = plt.hist(value_vec[binary_vec == 0], bins=bins, **kwargs)
    out1 = plt.hist(value_vec[binary_vec == 1], bins=bins, **kwargs)
    plt.close()
    plt.figure(figsize=figsize)
    plt.subplot(2, 1, 1)
    plt.hist((value_vec[binary_vec == 0],value_vec[binary_vec == 1]),
              stacked=True, bins=bins, **kwargs)
    bin_leftpts = (out1[1])[:-1]
    bin_rightpts = (out1[1])[1:]
    default_bin_centers = (bin_leftpts + bin_rightpts) / 2
    digitized_value_vec = np.digitize(value_vec, bins)
    bin_centers = np.array([np.mean(value_vec[digitized_value_vec==i])
                                if i in np.unique(digitized_value_vec)
                                else default_bin_centers[i-1]
                                for i in np.arange(len(bins)-1)+1])
    prob_numer = out1[0]
    prob_denom = out1[0] + out0[0]
    smoothing_const = .001
    probs = ((prob_numer + prior_prob * smoothing_const) /
             (prob_denom + smoothing_const))
    plt.subplot(2, 1, 2)
    if rel_risk:
        plt.plot(bin_centers, np.log(probs / prior_prob), '-o')
        plt.xlim(bin_leftpts[0], bin_rightpts[-1])
    else:
        plt.plot(bin_centers[:len(probs)], probs, '-o')
        plt.xlim(bin_leftpts[0], bin_rightpts[-1])
        yerr_mat_temp = beta.interval(1-error_bar_alpha,out1[0]+1,out0[0]+1)
        yerr_mat = np.vstack((yerr_mat_temp[0],yerr_mat_temp[1])) - probs
        yerr_mat[0,:] = -yerr_mat[0,:]
        plt.errorbar(bin_centers[:len(probs)], probs,
                     yerr=yerr_mat, capsize=5)
        plt.xlim(bin_leftpts[0], bin_rightpts[-1])
        if num_nans > 0:
            plt.hlines(y=nan_avg_value, xmin=bin_leftpts[0],
                       xmax=bin_leftpts[1], linestyle='dotted')
            plt.hlines(y=reg_avg_value, xmin=bin_leftpts[0],
                       xmax=bin_leftpts[1], linestyle='dashed')
    return {'bin_centers': bin_centers, 'probs': probs,
            'prob_numer': prob_numer, 'prob_denom': prob_denom}
Beispiel #57
0
def segmented_rings(edges, segments, center, shape, offset_angle=0):
    """
    Parameters
    ----------
    edges : array
         inner and outer radius for each ring

    segments : int or list
        number of pie slices or list of angles in radians
        That is, 8 produces eight equal-sized angular segments,
        whereas a list can be used to produce segments of unequal size.

    center : tuple
        point in image where r=0; may be a float giving subpixel precision.
        Order is (rr, cc).

    shape: tuple
        Image shape which is used to determine the maximum extent of output
        pixel coordinates. Order is (rr, cc).

    angle_offset : float or array, optional
        offset in radians from offset_angle=0 along the positive X axis

    Returns
    -------
    label_array : array
        Elements not inside any ROI are zero; elements inside each
        ROI are 1, 2, 3, corresponding to the order they are specified
        in edges and segments

    See Also
    --------
    ring_edges : Calculate the inner and outer radius of a set of rings.

    """
    edges = np.asarray(edges).ravel()
    if not 0 == len(edges) % 2:
        raise ValueError("edges should have an even number of elements, "
                         "giving inner, outer radii for each ring")
    if not np.all(np.diff(edges) >= 0):
        raise ValueError("edges are expected to be monotonically increasing, "
                         "giving inner and outer radii of each ring from "
                         "r=0 outward")

    agrid = utils.angle_grid(center, shape)

    agrid[agrid < 0] = 2 * np.pi + agrid[agrid < 0]

    segments_is_list = isinstance(segments, collections.Iterable)
    if segments_is_list:
        segments = np.asarray(segments) + offset_angle
    else:
        # N equal segments requires N+1 bin edges spanning 0 to 2pi.
        segments = np.linspace(0, 2 * np.pi, num=1 + segments, endpoint=True)
        segments += offset_angle

    # the indices of the bins(angles) to which each value in input
    #  array(angle_grid) belongs.
    ind_grid = (np.digitize(np.ravel(agrid), segments,
                            right=False)).reshape(shape)

    label_array = np.zeros(shape, dtype=np.int64)
    # radius grid for the image_shape
    rgrid = utils.radial_grid(center, shape)

    # assign indices value according to angles then rings
    len_segments = len(segments)
    for i in range(len(edges) // 2):
        indices = (edges[2 * i] <= rgrid) & (rgrid < edges[2 * i + 1])
        # Combine "segment #" and "ring #" to get unique label for each.
        label_array[indices] = ind_grid[indices] + (len_segments - 1) * i

    return label_array
                    # val = int(np.log10(val))
                    y.append(val)
            else:
                if val == None:
                    x.append(0)
                else:
                    x.append(val)
        # add 1 training example to the mix
        if ignore == False:
            X.append(x)

    print(len(X), len(y))
    # print(len(set(y)))
    # print(min(y), max(y))
    # print(get_fib(max(y)))
    y = np.digitize(y, get_fib(max(y)))
    # for i in range(len(inds)):
    #     print(inds[i])
    # from sklearn.preprocessing import scale
    # y = scale(y)
    # for i in range(len(y)):
    #     print(y[i])
    random_forest(X, y)

    # cov = np.cov(np.transpose(X))
    # from decimal import Decimal
    # temp = []
    # for i in range(len(cov)):
    #     a = []
    #     for j in range(len(cov[0])):
    #         a.append(round(Decimal(cov[i][j]), 2))

n_bins = 9 
binned_data_array = np.zeros([n_lwt,n_months,n_sites,n_days,n_bins])

bins = np.linspace(0,70,8)
print('bins', bins)
time.sleep(5)
for i_lwt in range(n_lwt):
    for i_month in range(n_months):
        for i_site in range(n_sites):
            for i_day in range(n_days):
                data_to_bin = (lwt_array_monthly[i_lwt, i_month, i_site, i_day])
                #print(' D2B',(data_to_bin))
                #print('non-nan', data_to_bin[~np.isnan(data_to_bin)])
                inds = np.digitize(data_to_bin, bins)
                #print('inds', inds)
                #print( i_lwt, i_month, i_site, i_day, inds)
                add_count = 1
                binned_data_array[i_lwt,i_month,i_site,i_day,inds] = add_count

for i_lwt in range(n_lwt):
    print('lwt = ', i_lwt)
    for i_bin in range(n_bins):
        print( 'bin', i_bin)
        print(np.count_nonzero(binned_data_array[i_lwt,:,:,:,i_bin]))    
        



 def getWeightqt_postVFP_Wplus(y, pt):
     biny = np.digitize(np.array([y]), yBins)[0] - 1
     binpt = np.digitize(np.array([pt]), qtBins)[0] - 1
     return h[biny, binpt]