def continuous_components(delta_X, delta_Y, delta_t, t, T, K):
       
    p = np.arange(K)
    
    delta_xp = np.take(delta_X, p)
    delta_yp = np.take(delta_Y, p)
    delta_tp = np.take(delta_t, p)
    tp = np.take(t, p)
    tp = np.hstack( ( np.array([0]) , tp ) )
    
    first_term_xi = np.cumsum(delta_X[0:K-1])
    second_term_xi = (delta_X[1:K]/delta_t[1:K]) * np.cumsum(delta_t[0:K-1])
    xi = np.hstack( ( np.array([0]), first_term_xi - second_term_xi ) )
    
    first_term_delta = np.cumsum(delta_Y[0:K-1])
    second_term_delta = (delta_Y[1:K]/delta_t[1:K]) * np.cumsum(delta_t[0:K-1])
    delta = np.hstack( ( np.array([0]), first_term_delta - second_term_delta ) )
        
    A0 = (1/T)*np.sum( (delta_xp/(2*delta_tp) * (np.square(tp[1:K+1]) - np.square(tp[0:K]))) + \
                       xi * (tp[1:K+1] - tp[0:K]))

    C0 = (1/T)*np.sum( (delta_yp/(2*delta_tp) * (np.square(tp[1:K+1]) - np.square(tp[0:K]))) + \
                       delta * (tp[1:K+1] - tp[0:K]))
    
    return A0, C0
	def __init__(self, fname, group, DM_lim):
		chain = hdf5io.TChain(fname, '%s/clouds' % group)
		
		mu_range = np.linspace(DM_lim[0], DM_lim[1], chain.get_nDim())
		
		self.lnp = chain.get_lnp()[0, 1:]
		lnp_min, lnp_max = np.percentile(self.lnp, [10., 90.])
		self.color = (self.lnp - lnp_min) / (lnp_max - lnp_min)
		self.color[self.color > 1.] = 1.
		self.color[self.color < 0.] = 0.
		
		# Plot all paths
		self.N_clouds = chain.get_nDim() / 2
		self.N_paths = chain.get_nSamples()
		mu_tmp = np.cumsum(chain.get_samples(0)[:,:self.N_clouds], axis=1)
		EBV_tmp = np.cumsum(np.exp(chain.get_samples(0)[:,self.N_clouds:]), axis=1)
		
		self.mu_all = np.zeros((self.N_paths, 2*(self.N_clouds+1)), dtype='f8')
		self.EBV_all = np.zeros((self.N_paths, 2*(self.N_clouds+1)), dtype='f8')
		self.mu_all[:,0] = mu_range[0]
		self.mu_all[:,1:-1:2] = mu_tmp
		self.mu_all[:,2:-1:2] = mu_tmp
		self.mu_all[:,-1] = mu_range[-1]
		self.EBV_all[:,2:-1:2] = EBV_tmp
		self.EBV_all[:,3::2] = EBV_tmp
Example #3
0
def cumsum(v,strict=False):
    if not strict:
        return np.cumsum(v,axis=0)
    else:
        out = np.zeros_like(v)
        out[1:] = np.cumsum(v[:-1],axis=0)
        return out
Example #4
0
def pos_neg_integral(scores):
    """Works only for 1D arrays at the moment, but can be easily extended."""
    scores = np.hstack([[0], scores])  # Padding.
    pos_scores, neg_scores = scores.copy(), scores.copy()
    idxs = scores >= 0
    pos_scores[~idxs], neg_scores[idxs] = 0, 0
    return np.cumsum(pos_scores), np.cumsum(neg_scores)
Example #5
0
def rcumsum(v,strict=False):
    if not strict:
        return np.cumsum(v[::-1],axis=0)[::-1]
    else:
        out = np.zeros_like(v)
        out[:-1] = np.cumsum(v[-1:0:-1],axis=0)[::-1]
        return out
Example #6
0
    def histogram(self, idx=None, critval=None):
        '''calculate histogram values

        does not do any plotting
        '''
        if self.mcres.ndim == 2:
            if  not idx is None:
                mcres = self.mcres[:,idx]
            else:
                raise ValueError('currently only 1 statistic at a time')
        else:
            mcres = self.mcres

        if critval is None:
            histo = np.histogram(mcres, bins=10)
        else:
            if not critval[0] == -np.inf:
                bins=np.r_[-np.inf, critval, np.inf]
            if not critval[0] == -np.inf:
                bins=np.r_[bins, np.inf]
            histo = np.histogram(mcres,
                                 bins=np.r_[-np.inf, critval, np.inf])

        self.histo = histo
        self.cumhisto = np.cumsum(histo[0])*1./self.nrepl
        self.cumhistoreversed = np.cumsum(histo[0][::-1])[::-1]*1./self.nrepl
        return histo, self.cumhisto, self.cumhistoreversed
Example #7
0
def drawPrfastscore(tp,fp,scr,tot,show=True):
    tp=numpy.cumsum(tp)
    fp=numpy.cumsum(fp)
    rec=tp/tot
    prec=tp/(fp+tp)
    #dif=numpy.abs(prec[1:]-rec[1:])
    dif=numpy.abs(prec[::-1]-rec[::-1])
    pos=dif.argmin()
    pos=len(dif)-pos-1
    ap=0
    for t in numpy.linspace(0,1,11):
        pr=prec[rec>=t]
        if pr.size==0:
            pr=0
        p=numpy.max(pr);
        ap=ap+p/11;
    if show:    
        pylab.plot(rec,prec,'-g')
        pylab.title("AP=%.3f EPRthr=%.3f"%(ap,scr[pos]))
        pylab.xlabel("Recall")
        pylab.ylabel("Precision")
        pylab.grid()
        pylab.show()
        pylab.draw()
    return rec,prec,scr,ap,scr[pos]
def single_threshold_otsu(img, mask=None):
    lval = 256
    # compute normalized histogram
    hist = cv2.calcHist([img], [0], mask, [256], [0,256])
    cumHist = np.cumsum(hist)
    norm_hist = hist.ravel()/cumHist[255]
    cumSum = np.cumsum(np.array(norm_hist), dtype=float)
    m = [norm_hist[i] * i for i in range(len(norm_hist))]
    means = np.array(m)
    cumMean = np.cumsum(means, dtype=float)
    mg =cumMean[lval - 1]
    global_variance = 0
    for i in range(len(norm_hist)):
        global_variance += ((i - mg)**2 * norm_hist[i])
    between_class_variance = np.zeros(256)
    max_variance = -1
    for k in range(1, lval - 2):
        p1 = cumSum[k]
        p2 = 1 - p1
        m1 = (1/p1) * cumMean[k] if p1 > 0 else 0
        m2 = (1/p2) * (cumMean[lval - 1] - cumMean[k]) if p2 > 0 else 0
        if between_class_variance[k] == 0:
            between_class_variance[k] = (p1 * (m1 - mg)**2) + (p2 * (m2 - mg)**2)
            if between_class_variance[k] > max_variance:
                    max_variance = between_class_variance[k]
    th = 0
    maxs = 0
    # find max variances and sum for averages if more than 1 for each
    for i in range(1, lval - 1):
        if between_class_variance[i] == max_variance:
            th += i
            maxs += 1
    th = th / maxs
    return th
Example #9
0
def Bh_Bv_timeseries(igramsFile):
  h5file = h5py.File(igramsFile)
  igramList = h5file['interferograms'].keys()
  Bh_igram=[]
  Bv_igram=[]
  for igram in igramList:
      Bh_igram.append(float(h5file['interferograms'][igram].attrs['H_BASELINE_TOP_HDR']))
      Bv_igram.append(float(h5file['interferograms'][igram].attrs['V_BASELINE_TOP_HDR']))


  A,B=design_matrix(h5file)
  tbase,dateList,dateDict,dateList1 = date_list(h5file)
  dt = np.diff(tbase)

  Bh_rate=np.dot(np.linalg.pinv(B),Bh_igram)
  zero = np.array([0.],np.float32)
  Bh = np.concatenate((zero,np.cumsum([Bh_rate*dt])))

  Bv_rate=np.dot(np.linalg.pinv(B),Bv_igram)
  zero = np.array([0.],np.float32)
  Bv = np.concatenate((zero,np.cumsum([Bv_rate*dt])))

  h5file.close()

  return Bh,Bv
def run(pars):

    verbose = pars.get('verbose', False)
    data    = pars.get('data')['samples']

    t = np.round(pars.get('target', 5)) # target sample size per option
    s = pars.get('s', 1.)               # continue scale factor

    counts_A = np.cumsum((data==0))
    counts_B = np.cumsum((data==1))

    p_sample_A = 1. / (1. + np.exp((counts_A + 1 - t) * s))
    p_sample_B = 1. / (1. + np.exp((counts_B + 1 - t) * s))

    p_sample_A = p_sample_A * (data==0) + (1 - p_sample_B) * p_sample_A * (data==1)
    p_sample_B = p_sample_B * (data==1) + (1 - p_sample_A) * p_sample_B * (data==0)

    p_sample_A = np.concatenate(([0.5], p_sample_A))
    p_sample_B = np.concatenate(([0.5], p_sample_B))

    p_stop = 1 - (p_sample_A + p_sample_B)

    return {'p_stop': p_stop,
            'p_sample_A': p_sample_A,
            'p_sample_B': p_sample_B}
Example #11
0
def _additive_estimate(events, timeline, _additive_f, _additive_var, reverse):
    """
    Called to compute the Kaplan Meier and Nelson-Aalen estimates.

    """
    if reverse:
        events = events.sort_index(ascending=False)
        at_risk = events['entrance'].sum() - events['removed'].cumsum().shift(1).fillna(0)

        deaths = events['observed']

        estimate_ = np.cumsum(_additive_f(at_risk, deaths)).sort_index().shift(-1).fillna(0)
        var_ = np.cumsum(_additive_var(at_risk, deaths)).sort_index().shift(-1).fillna(0)
    else:
        deaths = events['observed']
        at_risk = events['at_risk']
        estimate_ = np.cumsum(_additive_f(at_risk, deaths))
        var_ = np.cumsum(_additive_var(at_risk, deaths))

    timeline = sorted(timeline)
    estimate_ = estimate_.reindex(timeline, method='pad').fillna(0)
    var_ = var_.reindex(timeline, method='pad')
    var_.index.name = 'timeline'
    estimate_.index.name = 'timeline'

    return estimate_, var_
Example #12
0
def traj_ss(lon1, lat1, lon2, lat2):
    '''
    Trajectory skill score, from Liu and Weisberg, 2011
    '''

    # distance between drifters in time
    dist = get_dist(lon1, lon2, lat1, lat2) # in time

    # distance along path for control case, which is taken as lon1, lat1
    # first cumsum is to make length distance traveled up to that index
    length = np.cumsum(get_dist(lon1[:,:-1], lon1[:,1:], lat1[:,:-1], lat1[:,1:]), axis=1)

    # calculate s using cumulative sums
    # the first entry in time would be divided by zero, so this starts at the 2nd step
    # second cumsum is to sum up distances traveled
    s = np.cumsum(dist[:,1:], axis=1)/np.cumsum(length, axis=1)    

    # # pdb.set_trace()
    # # calculate skill score based on n=1
    # ind = (s>1)
    # ss = 1-s
    # ss[ind] = 0.

    # Return s instead of skill score so n parameter can be different
    return s
Example #13
0
def boxfilter(I, r):
    """Fast box filter implementation.

    Parameters
    ----------
    I:  a single channel/gray image data normalized to [0.0, 1.0]
    r:  window radius

    Return
    -----------
    The filtered image data.
    """
    M, N = I.shape
    dest = np.zeros((M, N))

    # cumulative sum over Y axis
    sumY = np.cumsum(I, axis=0)
    # difference over Y axis
    dest[:r + 1] = sumY[r: 2 * r + 1]
    dest[r + 1:M - r] = sumY[2 * r + 1:] - sumY[:M - 2 * r - 1]
    dest[-r:] = np.tile(sumY[-1], (r, 1)) - sumY[M - 2 * r - 1:M - r - 1]

    # cumulative sum over X axis
    sumX = np.cumsum(dest, axis=1)
    # difference over Y axis
    dest[:, :r + 1] = sumX[:, r:2 * r + 1]
    dest[:, r + 1:N - r] = sumX[:, 2 * r + 1:] - sumX[:, :N - 2 * r - 1]
    dest[:, -r:] = np.tile(sumX[:, -1][:, None], (1, r)) - \
        sumX[:, N - 2 * r - 1:N - r - 1]

    return dest
Example #14
0
    def rvs(self, n=1):
        """Generate random samples from the model.

        Parameters
        ----------
        n : int
            Number of samples to generate.

        Returns
        -------
        obs : array_like, length `n`
            List of samples
        """

        startprob_pdf = self.startprob
        startprob_cdf = np.cumsum(startprob_pdf)
        transmat_pdf = self.transmat
        transmat_cdf = np.cumsum(transmat_pdf, 1);

        # Initial state.
        rand = np.random.rand()
        currstate = (startprob_cdf > rand).argmax()
        obs = [self._generate_sample_from_state(currstate)]

        for x in xrange(n-1):
            rand = np.random.rand()
            currstate = (transmat_cdf[currstate] > rand).argmax()
            obs.append(self._generate_sample_from_state(currstate))

        return np.array(obs)
    def cumulative_moment(self, year, mag):
        '''Calculation of Mmax using aCumulative Moment approach, adapted from
        the cumulative strain energy method of Makropoulos & Burton (1983)
        :param year: Year of Earthquake
        :type year: numpy.ndarray
        :param mag: Magnitude of Earthquake
        :type mag: numpy.ndarray
        :keyword iplot: Include cumulative moment plot
        :type iplot: Boolean
        :return mmax: Returns Maximum Magnitude
        :rtype mmax: Float
        '''
        # Calculate seismic moment
        m_o = 10. ** (9.05 + 1.5 * mag)
        year_range = np.arange(np.min(year), np.max(year) + 1, 1)
        nyr = np.shape(year_range)[0]
        morate = np.zeros(nyr, dtype=float)
        # Get moment release per year
        for loc, tyr in enumerate(year_range):
            idx = np.abs(year - tyr) < 1E-5
            if np.sum(idx) > 0:
                # Some moment release in that year
                morate[loc] = np.sum(m_o[idx])
        ave_morate = np.sum(morate) / nyr

        # Average moment rate vector
        exp_morate = np.cumsum(ave_morate * np.ones(nyr))
        modiff = (np.abs(np.max(np.cumsum(morate) - exp_morate)) +
                  np.abs(np.min(np.cumsum(morate) - exp_morate)))
        # Return back to Mw
        if fabs(modiff) < 1E-20:
            return -np.inf
        mmax = (2. / 3.) * (np.log10(modiff) - 9.05)
        return mmax
Example #16
0
 def __init__(self, jetP4s, jetResolutions, massT=172.0, massW=80.4):
     self.Wm2, self.Tm2 = massW ** 2, massT ** 2
     self.rawJ = jetP4s
     self.invJ = 1.0 / np.array(jetResolutions)
     self.fit()
     _, self.fitW, self.fitT = np.cumsum(self.fitJ)
     _, self.rawW, self.rawT = np.cumsum(self.rawJ)
Example #17
0
    def __init__(self, b, bResolution, mu, nuXY, nuErr, massT=172.0, widthT=widthTop, massW=80.4, zPlus=True):

        for key, val in zip(
            ["", "XY", "Z", "E", "T2", "T", "Phi"],
            [mu, np.array([mu.x(), mu.y()]), mu.z(), mu.e(), mu.Perp2(), mu.Pt(), mu.Phi()],
        ):
            setattr(self, "mu" + key, val)

        for key, val in zip(
            ["massW2", "massT", "invT", "bound", "sign", "rawB", "nuXY", "fitNu"],
            [massW ** 2, massT, 1.0 / widthT, False, [-1, 1][zPlus], b, nuXY, utils.LorentzV()],
        ):
            setattr(self, key, val)

        self.bXY = np.array([b.x(), b.y()])

        eig, self.Einv = np.linalg.eig(nuErr)
        self.E = self.Einv.T
        self.inv = 1.0 / np.append([bResolution], np.sqrt(np.maximum(1, eig)))

        self.setFittedNu(nuXY)
        _, self.rawW, self.rawT = np.cumsum([mu, self.fitNu, self.rawB])

        self.residualsBSLT = self.fit()
        self.chi2 = self.residualsBSLT.dot(self.residualsBSLT)
        _, self.fitW, self.fitT = np.cumsum([mu, self.fitNu, self.fitB])
Example #18
0
File: Util.py Project: omosola/APGL
    def random2Choice(V, n=1):
        """
        Make a random binary choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2]
        then the probability of the indices repectively are [1/3, 2/3]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row.
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.ndim == 1 and V.shape[0] != 2:
            raise ValueError("Function only works on binary probabilities")
        if V.ndim == 2 and V.shape[1] != 2:
            raise ValueError("Function only works on binary probabilities")

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n)*cumV[-1]
            cumV2 = numpy.ones(n)*cumV[0] - p
            return numpy.array(cumV2 <= 0, numpy.int)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T
            cumV2 = numpy.outer(cumV[:, 0], numpy.ones(n)) - P
            return numpy.array(cumV2 <= 0, numpy.int)
        else:
            raise ValueError("Invalid number of dimensions")
def get_blotter_pnl(order_qty, filled_qty, filled_price, cum_position, data, drawdown):
    #import pdb; pdb.set_trace()
    mid = midpoint(data)
    cash = np.sum(filled_qty * filled_price) * (-1.0)
    open_cash = cum_position[-1] * mid[-1]
    pnl = cash + open_cash
    pnl_t = np.cumsum(cum_position[:-1] * np.diff(mid))
    spread = np.cumsum((mid - filled_price) * filled_qty)
    pnl_t = spread[1:] + pnl_t
    assert abs(pnl - pnl_t[-1]) < 0.01

    running_max = np.maximum.accumulate(pnl_t)
    idx = np.where(pnl_t - running_max < drawdown)[0]
    if len(idx) > 0:
        stop_idx = np.min(idx)
        cum_position[(stop_idx+1):] = 0.0
        pnl_t = np.cumsum(cum_position[:-1] * np.diff(mid))
        order_qty[(stop_idx+1):] = 0.0
        filled_qty[(stop_idx+1):] = 0.0
        spread = np.cumsum((mid - filled_price) * filled_qty)
        pnl_t = spread[1:] + pnl_t

    order_volume = np.sum(np.abs(order_qty))
    trade_volume = np.sum(np.abs(filled_qty))

    
    result = np.array([(pnl_t[-1], np.min(pnl_t), np.max(pnl_t),
                        np.min(cum_position), np.max(cum_position), trade_volume,
                        order_volume, trade_volume * 1.0 / order_volume)],
                      dtype = [('total_pnl', 'f'), ('min_pnl', 'f'),
                               ('max_pnl', 'f'), ('min_position', int),
                               ('max_position', int), ('volume', int),
                               ('order_volume', int), ('fill_ratio', float)])
    return result
def kittler(in_arr):
    """
    The reimplementation of Kittler-Illingworth Thresholding algorithm:
    https://www.mathworks.com/matlabcentral/fileexchange/45685
    Paper: [Kittler and Illingworth 1986] Minimum error thresholding.

    Args:
        in_arr(numpy.ndarray): Input 8-bits array.
    Returns:
        t(int): Calculated threshold.
    """
    h, g = np.histogram(in_arr.ravel(), 256, [0, 256])
    h = h.astype(np.float)
    g = g.astype(np.float)
    g = g[:-1]

    c = np.cumsum(h)
    m = np.cumsum(h * g)
    s = np.cumsum(h * g**2)
    sigma_f = np.sqrt(s/c - (m/c)**2)

    cb = c[-1] - c
    mb = m[-1] - m
    sb = s[-1] - s
    sigma_b = np.sqrt(sb/cb - (mb/cb)**2)

    p = c / c[-1]
    v = p * np.log(sigma_f) + (1-p)*np.log(sigma_b) - p*np.log(p) - \
        (1-p)*np.log(1-p)
    v[~np.isfinite(v)] = np.inf
    idx = np.argmin(v)
    t = g[idx]

    return t
Example #21
0
File: Util.py Project: omosola/APGL
    def randomChoice(V, n=1):
        """
        Make a random choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2, 4]
        then the probability of the indices repectively are [1/7, 2/7, 4/7]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row. 
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.shape[0]==0:
            return -1 

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n)*cumV[-1]
            return numpy.searchsorted(cumV, p)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T

            inds = numpy.zeros(P.shape, numpy.int)
            for i in range(P.shape[0]):
                inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :])

            return inds
        else:
            raise ValueError("Invalid number of dimensions")
Example #22
0
    def tests(self, distribution='exp', pdelete=0., independent=True, dither=0., tilewindow=1.0):
        
        assert distribution in ['exp', 'exponential', 'poisson', 'regular']
        samplerate = 0.1 # ms
        spikerate = 0.001 # firing rate
        nspikes = 100 # number of spikes to test
        if distribution in ['exp', 'exponential']:
            st1 = np.random.exponential(1./spikerate, nspikes)
            st1 = np.cumsum(st1)
        elif distribution == 'regular':
            st1 = np.linspace(int(10./samplerate),
                int(9000./samplerate), int(10./samplerate))
        elif distribution == 'poisson':
            st1 = np.random.poisson(1./spikerate, nspikes)
            st1 = np.cumsum(st1)
        
        if independent:
            st2 = np.random.exponential(1./spikerate, nspikes)
            st2 = np.cumsum(st1)
        else:
            st2 = st1
        st2 = np.random.choice(st2,
                    int((1.0-pdelete)*st1.shape[0]), replace=False)
        if dither > 0:
            st2 = st2 + np.random.randn(len(st2))*dither
#        print('len st1, st2: ', len(st1), len(st2), np.max(st1), np.max(st2))
        self.set_spikes(samplerate, st1, st2, tilewindow=tilewindow)
        sttc = self.calc_sttc()
        print('# of spikes in spike train 1: {0:d}, in spike train 2: {1:d} '.format(st1.shape[0], st2.shape[0]))
        print('STTC value: {0:.3f} '.format(sttc))
        self.plot_sttc(st1, st2)
Example #23
0
    def _divide(self):        
        block_size = self.spec.block_size # shortcut
        half_block = (block_size-1)/2
        
        rows, columns = self.dividing.nonzero()
        for i in range(len(rows)):
            row = rows[i]
            column = columns[i]

            write_block(self._cell_block, self.cells, row, column, block_size)
            cv2.filter2D(self._cell_block, cv2.CV_32F, self._tension_kernel,
                         self._probability, borderType=cv2.BORDER_CONSTANT)
            cv2.threshold(self._probability, self._tension_min, 0, 
                          cv2.THRESH_TOZERO, self._probability)
            self._probability[self._cell_block] = 0
            self._probability **= self.spec.tension_power
            self._probability *= self._distance_kernel
            
            # optimized version of np.random.choice
            np.cumsum(self._probability.flat, out=self._cumulative)
            total = self._cumulative[-1]
            if total < 1.0e-12:
                # no viable placements, we'll have precision problems anyways
                continue 
            self._cumulative /= total
            
            index = self._indices[np.searchsorted(self._cumulative, 
                                                  rdm.random())]
            local_row, local_column = np.unravel_index(index, 
                                                       self._probability.shape)
            self.set_alive(row+(local_row-half_block), 
                           column+(local_column-half_block))
Example #24
0
def windower(thing, max_radius):
    thing_pad = numpy.concatenate((
        thing[-max_radius:], thing, thing[:max_radius]
        ))
    thing_sum = numpy.cumsum(numpy.cumsum(thing_pad))
    
    return (len(thing), thing_sum, max_radius) 
    def _generate_sample(self, X, nn_data, nn_num, row, col, step):
        """Generate a synthetic sample with an additional steps for the
        categorical features.

        Each new sample is generated the same way than in SMOTE. However, the
        categorical features are mapped to the most frequent nearest neighbors
        of the majority class.
        """
        rng = check_random_state(self.random_state)
        sample = super(SMOTENC, self)._generate_sample(X, nn_data, nn_num,
                                                       row, col, step)
        # To avoid conversion and since there is only few samples used, we
        # convert those samples to dense array.
        sample = (sample.toarray().squeeze()
                  if sparse.issparse(sample) else sample)
        all_neighbors = nn_data[nn_num[row]]
        all_neighbors = (all_neighbors.toarray()
                         if sparse.issparse(all_neighbors) else all_neighbors)

        categories_size = ([self.continuous_features_.size] +
                           [cat.size for cat in self.ohe_.categories_])

        for start_idx, end_idx in zip(np.cumsum(categories_size)[:-1],
                                      np.cumsum(categories_size)[1:]):
            col_max = all_neighbors[:, start_idx:end_idx].sum(axis=0)
            # tie breaking argmax
            col_sel = rng.choice(np.flatnonzero(
                np.isclose(col_max, col_max.max())))
            sample[start_idx:end_idx] = 0
            sample[start_idx + col_sel] = 1

        return sparse.csr_matrix(sample) if sparse.issparse(X) else sample
Example #26
0
    def _major_index_fancy(self, idx):
        """Index along the major axis where idx is an array of ints.
        """
        idx_dtype = self.indices.dtype
        indices = np.asarray(idx, dtype=idx_dtype).ravel()

        _, N = self._swap(self.shape)
        M = len(indices)
        new_shape = self._swap((M, N))
        if M == 0:
            return self.__class__(new_shape)

        row_nnz = np.diff(self.indptr)
        idx_dtype = self.indices.dtype
        res_indptr = np.zeros(M+1, dtype=idx_dtype)
        np.cumsum(row_nnz[idx], out=res_indptr[1:])

        nnz = res_indptr[-1]
        res_indices = np.empty(nnz, dtype=idx_dtype)
        res_data = np.empty(nnz, dtype=self.dtype)
        csr_row_index(M, indices, self.indptr, self.indices, self.data,
                      res_indices, res_data)

        return self.__class__((res_data, res_indices, res_indptr),
                              shape=new_shape, copy=False)
Example #27
0
    def _major_slice(self, idx, copy=False):
        """Index along the major axis where idx is a slice object.
        """
        if idx == slice(None):
            return self.copy() if copy else self

        M, N = self._swap(self.shape)
        start, stop, step = idx.indices(M)
        M = len(xrange(start, stop, step))
        new_shape = self._swap((M, N))
        if M == 0:
            return self.__class__(new_shape)

        row_nnz = np.diff(self.indptr)
        idx_dtype = self.indices.dtype
        res_indptr = np.zeros(M+1, dtype=idx_dtype)
        np.cumsum(row_nnz[idx], out=res_indptr[1:])

        if step == 1:
            all_idx = slice(self.indptr[start], self.indptr[stop])
            res_indices = np.array(self.indices[all_idx], copy=copy)
            res_data = np.array(self.data[all_idx], copy=copy)
        else:
            nnz = res_indptr[-1]
            res_indices = np.empty(nnz, dtype=idx_dtype)
            res_data = np.empty(nnz, dtype=self.dtype)
            csr_row_slice(start, stop, step, self.indptr, self.indices,
                          self.data, res_indices, res_data)

        return self.__class__((res_data, res_indices, res_indptr),
                              shape=new_shape, copy=False)
Example #28
0
    def SNfunc(self,data,sig,significancefloor=0.5):
        D=data.ravel()
        S=sig.ravel()

        args=numpy.argsort(-D/S)
        D=numpy.take(D,args)
        S=numpy.take(S,args)
        Dsum=numpy.cumsum(D)
        Ssum=numpy.cumsum(S**2)**0.5
        SN=(Dsum/Ssum).max()

        #regional SN
        import scipy.ndimage as  ndimage
        data[data/sig<significancefloor]=0
        masks, multiplicity = ndimage.measurements.label(data)
        labels=numpy.arange(1, multiplicity+1)
        SNs=numpy.zeros(multiplicity+1)
        SNs[0]=SN
        for i in range(multiplicity):
            D=data[masks==i+1].ravel()
            S=sig[masks==i+1].ravel()
            args=numpy.argsort(-D/S)
            D=numpy.take(D,args)
            S=numpy.take(S,args)
            Dsum=numpy.cumsum(D)
            Ssum=numpy.cumsum(S**2)**0.5
            SNi=(Dsum/Ssum).max()
            SNs[i+1]=SNi
        SNs=-numpy.sort(-SNs)
        return SNs
Example #29
0
    def resize(self, *shape):
        shape = check_shape(shape)
        if hasattr(self, 'blocksize'):
            bm, bn = self.blocksize
            new_M, rm = divmod(shape[0], bm)
            new_N, rn = divmod(shape[1], bn)
            if rm or rn:
                raise ValueError("shape must be divisible into %s blocks. "
                                 "Got %s" % (self.blocksize, shape))
            M, N = self.shape[0] // bm, self.shape[1] // bn
        else:
            new_M, new_N = self._swap(shape)
            M, N = self._swap(self.shape)

        if new_M < M:
            self.indices = self.indices[:self.indptr[new_M]]
            self.data = self.data[:self.indptr[new_M]]
            self.indptr = self.indptr[:new_M + 1]
        elif new_M > M:
            self.indptr = np.resize(self.indptr, new_M + 1)
            self.indptr[M + 1:].fill(self.indptr[M])

        if new_N < N:
            mask = self.indices < new_N
            if not np.all(mask):
                self.indices = self.indices[mask]
                self.data = self.data[mask]
                major_index, val = self._minor_reduce(np.add, mask)
                self.indptr.fill(0)
                self.indptr[1:][major_index] = val
                np.cumsum(self.indptr, out=self.indptr)

        self._shape = shape
Example #30
0
    def __init__(self, lc, voltage, t_tot, t_anchoring, pretilt=0,
                 totaltwist=0, nlayers=100, data_file=None):

        self.lc = lc
        self.t_tot = t_tot
        self.t_anchoring = t_anchoring
        self.pretilt = pretilt
        self.totaltwist = totaltwist
        self.nlayers = nlayers
        self.data_file = data_file
        # thicknesses of internal layers
        tlc_internal = (self.t_tot - 2. * self.t_anchoring) / \
            (self.nlayers - 2.) * numpy.ones(self.nlayers - 2)
        # thicknesses of layers
        self.tlc = numpy.r_[self.t_anchoring, tlc_internal, self.t_anchoring]
        # internal sample points
        lhs = numpy.r_[0, numpy.cumsum(tlc_internal)]
        # normalized sample points: at the center of internal layers, plus the
        # boundaries (i.e. the anchoring layers)
        self.normalized_sample_points = numpy.r_[
            0, (lhs[1:] + lhs[:-1]) / 2. / (self.t_tot - 2 * self.t_anchoring),
            1]
        tmp = numpy.r_[0, numpy.cumsum(self.tlc)]
        self.sample_points = .5 * (tmp[1:] + tmp[:-1])
        # finally, apply voltage
        self.voltage = voltage
Example #31
0
def _process_estimation_data(df, state_space, optim_paras, options):
    """Process estimation data.

    All necessary objects for :func:`_internal_log_like_obs` dependent on the data are
    produced.

    Some objects have to be repeated for each type which is a desirable format for the
    estimation where every observations is weighted by type probabilities.

    Parameters
    ----------
    df : pandas.DataFrame
        The DataFrame which contains the data used for estimation. The DataFrame
        contains individual identifiers, periods, experiences, lagged choices, choices
        in current period, the wage and other observed data.
    state_space : ~respy.state_space.StateSpace
    optim_paras : dict

    Returns
    -------
    choices : numpy.ndarray
        Array with shape (n_observations, n_types) where information is only repeated
        over the second axis.
    idx_indiv_first_obs : numpy.ndarray
        Array with shape (n_individuals,) containing indices for the first observations
        of each individual.
    indices : numpy.ndarray
        Array with shape (n_observations, n_types) containing indices for states which
        correspond to observations.
    log_wages_observed : numpy.ndarray
        Array with shape (n_observations, n_types) containing clipped log wages.
    type_covariates : numpy.ndarray
        Array with shape (n_individuals, n_type_covariates) containing covariates to
        predict probabilities for each type.

    """
    labels, _ = generate_column_labels_estimation(optim_paras)

    df = df.sort_values(["Identifier", "Period"])[labels]
    df = df.rename(columns=lambda x: x.replace("Experience", "exp").lower())
    df = convert_choice_variables_from_categorical_to_codes(df, optim_paras)

    # Get indices of states in the state space corresponding to all observations for all
    # types. The indexer has the shape (n_observations, n_types).
    indices = ()

    for period in range(df.period.max() + 1):
        period_df = df.query("period == @period")

        period_experience = tuple(
            period_df[col].to_numpy()
            for col in period_df.filter(like="exp_").columns)
        period_lagged_choice = tuple(
            period_df[f"lagged_choice_{i}"].to_numpy()
            for i in range(1, optim_paras["n_lagged_choices"] + 1))
        period_observables = tuple(
            period_df[observable].to_numpy()
            for observable in optim_paras["observables"])

        period_indices = state_space.indexer[period][period_experience +
                                                     period_lagged_choice +
                                                     period_observables]

        indices += (period_indices, )

    indices = np.row_stack(indices)

    # The indexer is now sorted in period-individual pairs whereas the estimation needs
    # individual-period pairs. Sort it!
    indices_to_reorder = (df.sort_values(
        ["period",
         "identifier"]).assign(__index__=np.arange(df.shape[0])).sort_values(
             ["identifier", "period"])["__index__"].to_numpy())
    indices = indices[indices_to_reorder]

    # Get an array of positions of the first observation for each individual. This is
    # used in :func:`_internal_log_like_obs` to aggregate probabilities of the
    # individual over all periods.
    n_obs_per_indiv = np.bincount(df.identifier.to_numpy())
    idx_indiv_first_obs = np.hstack((0, np.cumsum(n_obs_per_indiv)[:-1]))

    # For the estimation, log wages are needed with shape (n_observations, n_types).
    log_wages_observed = (np.log(df.wage.to_numpy()).clip(
        -HUGE_FLOAT, HUGE_FLOAT).repeat(optim_paras["n_types"]))

    # For the estimation, choices are needed with shape (n_observations * n_types).
    choices = df.choice.to_numpy().repeat(optim_paras["n_types"])

    # For the type covariates, we only need the first observation of each individual.
    states = df.groupby("identifier").first()
    type_covariates = (create_type_covariates(states, optim_paras, options)
                       if optim_paras["n_types"] > 1 else None)

    return choices, idx_indiv_first_obs, indices, log_wages_observed, type_covariates
# h(i)
h = np.zeros(101)
for i in range(H1, H2 + 1):
    for j in range(W1, W2 + 1):
        L, u, v = LuvMatrix[i, j]
        if (L > maxL):
            L = 100
        elif (L < minL):
            L = 0
        else:
            L = math.floor(L)
        LuvMatrix[i, j] = [L, u, v]
        h[L] += +1

fs = np.cumsum(h)  # Frequency sum
fi1 = np.zeros(101)  # (f(i) + f(i-1))/2
for i in range(0, 101):
    if (i == 0):
        fi1[i] = math.floor(fs[i] * 101 / (2 * fs[100]))
    else:
        fi1[i] = math.floor((fs[i - 1] + fs[i]) * 101 / (2 * fs[100]))

HSOutput = inputImage.copy()
# Histogram equalization and conversions
for i in range(H1, H2 + 1):
    for j in range(W1, W2 + 1):
        # Histogram equalization in Luv Domain
        L, u, v = LuvMatrix[i, j]
        L = fi1[int(L)]
def hist_skip(inFname,
              bandIndex,
              percentileMin,
              percentileMax,
              outFname,
              nbuckets=1000):
    """
  Given a filename, finds approximate percentile values and provides the
  gdal_translate invocation required to create an 8-bit PNG.
  Works by evaluating a histogram of the original raster with a large number of
  buckets between the raster minimum and maximum, then estimating the
  probability mass and distribution functions before reporting the percentiles
  requested.
  N.B. This technique is very approximate and hasn't been checked for asymptotic
  convergence. Heck, it uses GDAL's `GetHistogram` function in approximate mode,
  so you're getting approximate percentiles using an approximated histogram.
  Optional arguments:
  - `percentiles`: list of percentiles, between 0 and 100 (inclusive).
  - `nbuckets`: the more buckets, the better percentile approximations you get.
  """
    src = gdal.Open(inFname)
    band = src.GetRasterBand(int(bandIndex))
    percentiles = [float(percentileMin), float(percentileMax)]
    # Use GDAL to find the min and max
    (lo, hi, avg, std) = band.GetStatistics(True, True)

    # Use GDAL to calculate a big histogram
    rawhist = band.GetHistogram(min=lo, max=hi, buckets=nbuckets)
    binEdges = np.linspace(lo, hi, nbuckets + 1)

    # Probability mass function. Trapezoidal-integration of this should yield 1.0.
    pmf = rawhist / (np.sum(rawhist) * np.diff(binEdges[:2]))
    # Cumulative probability distribution. Starts at 0, ends at 1.0.
    distribution = np.cumsum(pmf) * np.diff(binEdges[:2])

    # Which histogram buckets are close to the percentiles requested?
    idxs = [np.sum(distribution < p / 100.0) for p in percentiles]
    # These:
    vals = [binEdges[i] for i in idxs]

    # Append 0 and 100% percentiles (min & max)
    percentiles = [0] + percentiles + [100]
    vals = [lo] + vals + [hi]

    # Print the percentile table
    print "percentile (out of 100%),value at percentile"
    for (p, v) in zip(percentiles, vals):
        print "%f,%f" % (p, v)

    if vals[1] == 0:
        print "percentile " + str(percentileMin) + " is equal to 0"
        print "Percentile recomputation as pNoZero+" + str(
            percentileMin
        ) + ", where pNoZero is the first percentile with no zero value"

        pNoZero = 0
        for p in range(int(percentileMin), 100):
            idx = np.sum(distribution < float(p) / 100.0)
            val = binEdges[idx]
            if val > 0:
                pNoZero = p + int(percentileMin)
                break
        percentiles = [float(pNoZero), float(percentileMax)]
        # Which histogram buckets are close to the percentiles requested?
        idxs = [np.sum(distribution < p / 100.0) for p in percentiles]
        # These:
        vals = [binEdges[i] for i in idxs]

        # Append 0 and 100% percentiles (min & max)
        percentiles = [0] + percentiles + [100]
        vals = [lo] + vals + [hi]
        # Print the percentile table
        print "percentile (out of 100%),value at percentile"
        for (p, v) in zip(percentiles, vals):
            print "%f,%f" % (p, v)

    # Print out gdal_calc command
    gdalCalcCommand = "gdal_calc.py -A " + inFname + " --A_band=" + bandIndex + " --calc=" + '"' + str(
        vals[1]) + "*logical_and(A>0, A<=" + str(vals[1]) + ")+A*(A>" + str(
            vals[1]
        ) + ")" + '"' + " --outfile=gdal_calc_result.tif --NoDataValue=0"
    print "running  " + gdalCalcCommand
    os.system(gdalCalcCommand)

    # Print out gdal_translate command (what we came here for anyway)
    gdalTranslateCommand = "gdal_translate -b 1 -co TILED=YES -co BLOCKXSIZE=512 -co BLOCKYSIZE=512 -co ALPHA=YES -ot Byte -a_nodata 0 -scale " + str(
        vals[1]) + " " + str(
            vals[2]) + " 1 255 gdal_calc_result.tif " + outFname
    print "running  " + gdalTranslateCommand
    os.system(gdalTranslateCommand)

    # remove temp file
    os.system("rm gdal_calc_result.tif")

    return (vals, percentiles)
Example #34
0
def plotkpi(kpi, filePath):
    # kpi = kpi_frame; filePath = analysis_path
    rst_path = get_path(filePath)

    # 周转天数图
    xlabeln = u'周转天数'
    ylabeln = u'频数'
    ylabel2n = u'周转天数累积分布'
    titlen = u'周转分布'
    fig, ax1 = plt.subplots()
    ret = plt.hist(kpi.TD, bins=50, range=[0.1, 200], color='#0070C0')  # 这里将一些特殊的sku去掉:为0的,和超过200的
    counts, bins, patches = ret[0], ret[1], ret[2]
    ax2 = ax1.twinx()
    sum_counts = np.cumsum(counts) / counts.sum()
    plt.plot(bins[1:], sum_counts, color='#C44E52')
    ax1.set_xlabel(xlabeln)
    ax1.set_ylabel(ylabeln)
    ax2.set_ylabel(ylabel2n)
    ax1.set_ylim(-counts.max() * 0.05, counts.max() * 1.05)
    ax1.set_xlim(-10, 200 * 1.05)
    ax2.set_ylim(-0.05, 1.05)
    ax2.yaxis.grid(False)
    plt.savefig(rst_path + '\\td')
    # 第二版
    binsn = range(0,100,10) + [np.inf]
    save_path = rst_path + '\\td2'
    plothistper(kpi[(kpi.TD.notnull())&(kpi.TD != np.inf)&(kpi.TD != -np.inf)]["TD"], binsn, xlabeln, ylabeln, titlen, save_path, cum_True=True)

    # 现货率图
    xlabeln = u'现货率'
    ylabeln = u'频数'
    ylabel2n = u'现货率累积分布'
    titlen = u'现货率分布'
    fig, ax1 = plt.subplots()
    ret = plt.hist(kpi.CR, bins=50, label='Z', color='#0070C0')
    counts, bins, patches = ret[0], ret[1], ret[2]
    ax2 = ax1.twinx()
    sum_counts = np.cumsum(counts) / counts.sum()
    plt.plot(bins[1:], sum_counts, color='#C44E52')
    ax1.set_xlabel(xlabeln)
    ax1.set_ylabel(ylabeln)
    ax2.set_ylabel(ylabel2n)
    ax1.set_ylim(-counts.max() * 0.05, counts.max() * 1.05)
    ax1.set_xlim(-0.05, 1.05)
    ax2.set_ylim(-0.05, 1.05)
    ax2.yaxis.grid(False)
    plt.savefig(rst_path+'\\cr')
    # 第二版
    binsn = np.linspace(0,1,11)
    save_path = rst_path + '\\cr2'
    plothistper(kpi[(kpi.CR.notnull())&(kpi.CR != np.inf)&(kpi.CR != -np.inf)]["CR"], binsn, xlabeln, ylabeln, titlen, save_path, cum_True=True, intshu=False)

    # 现货率和周转天数图
    fig2 = plt.figure()
    ax = fig2.add_subplot(111)
    # ax.grid()
    ax.set_xlabel(u"周转天数")
    ax.set_ylabel(u"现货率")
    ax.set_xlim(0, 200)
    ax.set_ylim(0, 1)
    plt.scatter(kpi.TD, kpi.CR, color='#0070C0')
    plt.plot([60, 60], [0, 1], '--', color='red')
    plt.plot([0, 200], [0.8, 0.8], '--', color='red')
    plt.annotate('(1)', xy=(1, 1), xytext=(25, 0.9), fontsize=20, color='red')
    plt.annotate('(2)', xy=(1, 1), xytext=(130, 0.9), fontsize=20, color='red')
    plt.annotate('(3)', xy=(1, 1), xytext=(25, 0.4), fontsize=20, color='red')
    plt.annotate('(4)', xy=(1, 1), xytext=(130, 0.4), fontsize=20, color='red')
    plt.savefig(rst_path+'\\cr_td')
Example #35
0
def PlotHypsometry(hypsometer):

    zbins = hypsometer['z'].values
    areas = hypsometer['area'].values

    fig = plt.figure(1, facecolor='white', figsize=(6.25, 3.5))
    gs = plt.GridSpec(100, 150, bottom=0.15, left=0.1, right=1.0, top=1.0)

    nodata_area = areas[0]
    z = zbins[1:]
    areas = areas[1:]

    ax = fig.add_subplot(gs[10:95, 40:140])
    cum_areas = np.flip(np.cumsum(np.flip(areas, axis=0)), axis=0)
    total_area = np.sum(areas)

    # if hypsometry:
    #     ax.fill_between(100 *cum_areas / total_area, 0, z, color='#f2f2f2')
    #     ax.plot(100 * cum_areas / total_area, z, color='k', linestyle='--', linewidth=0.6)

    ax.fill_between(100 * cum_areas / total_area, 0, z, color='lightgray')
    ax.plot(100 * cum_areas / total_area, z, color='k', linewidth=1.0)

    minz = np.min(z[areas > 0])
    maxz = np.max(z[areas > 0])
    dz = 100.0

    ax.spines['top'].set_linewidth(1)
    ax.spines['left'].set_linewidth(1)
    ax.spines['right'].set_linewidth(1)
    ax.spines['bottom'].set_linewidth(1)
    ax.set_xlabel("Cumulative surface (%)")
    ax.set_xlim([0, 100])
    ax.set_ylim(minz, maxz)
    ax.tick_params(axis='both', width=1, pad=2)

    for tick in ax.xaxis.get_major_ticks():
        tick.set_pad(2)

    z = np.arange(minz, maxz + dz, dz)
    groups = np.digitize(zbins[:-1], z)

    ax = fig.add_subplot(gs[10:95, 10:30])

    # if hypsometry:
    #     grouped_hyp = np.array([np.sum(areas[groups == k]) for k in range(1, z.size)])
    #     ax.barh(z[:-1], 100.0 * grouped_hyp / total_area, dz, align='edge', color='#f2f2f2', edgecolor='k')

    grouped = np.array([np.sum(areas[groups == k]) for k in range(1, z.size)])
    ax.barh(z[:-1],
            100.0 * grouped / total_area,
            dz,
            align='edge',
            color='lightgray',
            edgecolor='k')

    ax.spines['top'].set_linewidth(1)
    ax.spines['left'].set_linewidth(1)
    ax.spines['right'].set_linewidth(1)
    ax.spines['bottom'].set_linewidth(1)
    ax.set_xlabel("Surface (%)")
    ax.set_ylim(minz, maxz)
    ax.set_ylabel("Altitude (m)")
    ax.tick_params(axis='both', width=1, pad=2)

    for tick in ax.xaxis.get_major_ticks():
        tick.set_pad(2)

    fig_size_inches = 12.50
    aspect_ratio = 2.0
    cbar_L = "None"
    [fig_size_inches, map_axes, cbar_axes] = MapFigureSizer(fig_size_inches,
                                                            aspect_ratio,
                                                            cbar_loc=cbar_L,
                                                            title="None")

    fig.set_size_inches(fig_size_inches[0], fig_size_inches[1])

    return fig
Example #36
0
print("------ GPTS stationary ------")
(stationary_rewards, stationary_final_environment,
 stationary_final_subcampaign_algos, regression_errors_max,
 regression_errors_sum) = experiment.perform(timesteps_stationary)

print("------ GPTS context generation ------")
(context_generation_rewards, context_generation_final_environment,
 context_generation_final_subcampaign_algos) = experiment.perform(
     timesteps_context_generation, context_generation_rate)

############################################
## Plot results
############################################

cumulative_stationary_reward = np.cumsum(stationary_rewards)
cumulative_context_generation_reward = np.cumsum(context_generation_rewards)
cumulative_clairvoyant_reward = np.cumsum(clairvoyant_rewards)
cumulative_disaggregated_clairvoyant_reward = np.cumsum(
    disaggregated_clairvoyant_rewards)

# Cumulative rewards

plt.plot(cumulative_stationary_reward)
plt.plot(cumulative_context_generation_reward)
plt.plot(cumulative_clairvoyant_reward)
plt.plot(cumulative_disaggregated_clairvoyant_reward)
plt.legend([
    'GPTS - Stationary', 'GPTS - Context generation', 'Clairvoyant',
    'Clairvoyant - Optimal context'
],
 def __init__(self, dist):
     red = np.cumsum(list(dist['red'].values()))
     green = np.cumsum(list(dist['green'].values()))
     blue = np.cumsum(list(dist['blue'].values()))
     self.dist = {'red':red, 'green':green, 'blue':blue}
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n
Example #39
0
    def accumulate(self, p=None):
        '''
        Accumulate per image evaluation results and store the result in self.eval
        :param p: input params for evaluation
        :return: None
        '''
        print('Accumulating evaluation results...')
        tic = time.time()
        if not self.evalImgs:
            print('Please run evaluate() first')
        # allows input customized parameters
        if p is None:
            p = self.params
        p.catIds = p.catIds if p.useCats == 1 else [-1]
        T = len(p.iouThrs)
        R = len(p.fppiThrs)
        K = len(p.catIds) if p.useCats else 1
        M = len(p.maxDets)
        # -1 for the precision of absent categories
        ys = -np.ones((T, R, K, M))

        # create dictionary for future indexing
        _pe = self._paramsEval
        catIds = [1]  # _pe.catIds if _pe.useCats else [-1]
        setK = set(catIds)
        setM = set(_pe.maxDets)
        setI = set(_pe.imgIds)
        # get inds to evaluate
        k_list = [n for n, k in enumerate(p.catIds) if k in setK]

        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
        i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
        I0 = len(_pe.imgIds)

        # retrieve E at each category, area range, and max number of detections
        for k, k0 in enumerate(k_list):
            Nk = k0 * I0
            for m, maxDet in enumerate(m_list):
                E = [self.evalImgs[Nk + i] for i in i_list]
                E = [e for e in E if e is not None]
                if len(E) == 0:
                    continue

                dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])

                # different sorting method generates slightly different results.
                # mergesort is used to be consistent as Matlab implementation.

                inds = np.argsort(-dtScores, kind='mergesort')

                dtm = np.concatenate([e['dtMatches'][:, 0:maxDet] for e in E],
                                     axis=1)[:, inds]
                dtIg = np.concatenate([e['dtIgnore'][:, 0:maxDet] for e in E],
                                      axis=1)[:, inds]
                gtIg = np.concatenate([e['gtIgnore'] for e in E])
                npig = np.count_nonzero(gtIg == 0)
                if npig == 0:
                    continue
                tps = np.logical_and(dtm, np.logical_not(dtIg))
                fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
                # print(tps,fps)
                inds = np.where(dtIg == 0)[1]
                # print(inds)
                tps = tps[:, inds]
                fps = fps[:, inds]
                # print(tps,fps)
                tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
                fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
                # print(tp_sum,fp_sum)
                for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
                    tp = np.array(tp)
                    fppi = np.array(fp) / I0
                    nd = len(tp)
                    recall = tp / npig
                    q = np.zeros((R, ))

                    # numpy is slow without cython optimization for accessing elements
                    # use python array gets significant speed improvement
                    recall = recall.tolist()
                    q = q.tolist()

                    for i in range(nd - 1, 0, -1):
                        if recall[i] < recall[i - 1]:
                            recall[i - 1] = recall[i]

                    inds = np.searchsorted(fppi, p.fppiThrs, side='right') - 1
                    try:
                        for ri, pi in enumerate(inds):
                            q[ri] = recall[pi]
                    except BaseException:
                        pass
                    ys[t, :, k, m] = np.array(q)
        self.eval = {
            'params': p,
            'counts': [T, R, K, M],
            'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'TP': ys,
        }
        toc = time.time()
        print('DONE (t={:0.2f}s).'.format(toc - tic))
Example #40
0
def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=True):
    """rec, prec, ap = voc_eval(detpath,
                           annopath,
                           imagesetfile,
                           classname,
                           [ovthresh],
                           [use_07_metric])
Top level function that does the PASCAL VOC evaluation.
detpath: Path to detections
   detpath.format(classname) should produce the detection results file.
annopath: Path to annotations
   annopath.format(imagename) should be the xml annotations file.
imagesetfile: Text file containing the list of images, one image per line.
classname: Category name (duh)
cachedir: Directory for caching the annotations
[ovthresh]: Overlap threshold (default = 0.5)
[use_07_metric]: Whether to use VOC07's 11 point AP computation
   (default True)
"""
# assumes detections are in detpath.format(classname)
# assumes annotations are in annopath.format(imagename)
# assumes imagesetfile is a text file with each line an image name
# cachedir caches the annotations in a pickle file
# first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    cachefile = os.path.join(cachedir, 'annots.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]
    if not os.path.isfile(cachefile):
        # load annots
        recs = {}
        for i, imagename in enumerate(imagenames):
            recs[imagename] = parse_rec(annopath % (imagename))
            if i % 100 == 0:
                print('Reading annotation for {:d}/{:d}'.format(
                    i + 1, len(imagenames)))
        # save
        print('Saving cached annotations to {:s}'.format(cachefile))
        with open(cachefile, 'wb') as f:
            pickle.dump(recs, f)
    else:
        # load
        with open(cachefile, 'rb') as f:
            recs = pickle.load(f)

    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj['name'] == classname]
        bbox = np.array([x['bbox'] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = {'bbox': bbox,
                                 'difficult': difficult,
                                 'det': det}

    # read dets
    detfile = detpath.format(classname)
    with open(detfile, 'r') as f:
        lines = f.readlines()
    if any(lines) == 1:

        splitlines = [x.strip().split(' ') for x in lines]
        image_ids = [x[0] for x in splitlines]
        confidence = np.array([float(x[1]) for x in splitlines])
        BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

        # sort by confidence
        sorted_ind = np.argsort(-confidence)
        sorted_scores = np.sort(-confidence)
        BB = BB[sorted_ind, :]
        image_ids = [image_ids[x] for x in sorted_ind]

        # go down dets and mark TPs and FPs
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        for d in range(nd):
            R = class_recs[image_ids[d]]
            bb = BB[d, :].astype(float)
            ovmax = -np.inf
            BBGT = R['bbox'].astype(float)
            if BBGT.size > 0:
                # compute overlaps
                # intersection
                ixmin = np.maximum(BBGT[:, 0], bb[0])
                iymin = np.maximum(BBGT[:, 1], bb[1])
                ixmax = np.minimum(BBGT[:, 2], bb[2])
                iymax = np.minimum(BBGT[:, 3], bb[3])
                iw = np.maximum(ixmax - ixmin, 0.)
                ih = np.maximum(iymax - iymin, 0.)
                inters = iw * ih
                uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
                       (BBGT[:, 2] - BBGT[:, 0]) *
                       (BBGT[:, 3] - BBGT[:, 1]) - inters)
                overlaps = inters / uni
                ovmax = np.max(overlaps)
                jmax = np.argmax(overlaps)

            if ovmax > ovthresh:
                if not R['difficult'][jmax]:
                    if not R['det'][jmax]:
                        tp[d] = 1.
                        R['det'][jmax] = 1
                    else:
                        fp[d] = 1.
            else:
                fp[d] = 1.

        # compute precision recall
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        rec = tp / float(npos)
        # avoid divide by zero in case the first detection matches a difficult
        # ground truth
        prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
        ap = voc_ap(rec, prec, use_07_metric)
    else:
        rec = -1.
        prec = -1.
        ap = -1.

    return rec, prec, ap
Example #41
0
fig, ax = plt.subplots(figsize=(5, 3))
fig.subplots_adjust(bottom=0.15, left=0.2)
ax.plot(x1, y1)
ax.set_xlabel('time [s]', fontsize='large', fontweight='bold')
ax.set_ylabel('Damped oscillation [V]', fontproperties=font)

plt.show()

##############################################################################
# Finally, we can use native TeX rendering in all text objects and have
# multiple lines:

fig, ax = plt.subplots(figsize=(5, 3))
fig.subplots_adjust(bottom=0.2, left=0.2)
ax.plot(x1, np.cumsum(y1**2))
ax.set_xlabel('time [s] \n This was a long experiment')
ax.set_ylabel(r'$\int\ Y^2\ dt\ \ [V^2 s]$')
plt.show()


##############################################################################
# Titles
# ======
#
# Subplot titles are set in much the same way as labels, but there is
# the *loc* keyword arguments that can change the position and justification
# from the default value of ``loc=center``.

fig, axs = plt.subplots(3, 1, figsize=(5, 6), tight_layout=True)
locs = ['center', 'left', 'right']
Example #42
0
print(digit_train.info())

X = digit_train.iloc[:, 1:] / 255.0
y = digit_train['label']

X_train, X_eval, y_train, y_eval = model_selection.train_test_split(
    X, y, test_size=0.1, random_state=1)

zv = feature_selection.VarianceThreshold(threshold=0.0)
X_train1 = zv.fit_transform(X_train)

sns.heatmap(X_train.corr())

lpca = decomposition.PCA(n_components=0.95)
X_train2 = lpca.fit_transform(X_train1)
np.cumsum(lpca.explained_variance_ratio_)

tsne = manifold.TSNE()
tsne_data = tsne.fit_transform(X_train2)
cutils.plot_data_2d_classification(tsne_data, y_train)

kernel_svm_estimator = svm.SVC(kernel='rbf')
kernel_svm_grid = {
    'gamma': [0.01, 0.1, 1, 2, 5, 10],
    'C': [0.001, 0.01, 0.1, 0.5]
}
kernel_svm_grid_estimator = model_selection.GridSearchCV(kernel_svm_estimator,
                                                         kernel_svm_grid,
                                                         scoring='accuracy',
                                                         cv=10)
kernel_svm_grid_estimator.fit(X_train2, y_train)
from mantid.simpleapi import *
import numpy as np
import matplotlib.pyplot as plt
import math

filename = 'CORELLI_666'

ws=LoadEventNexus(Filename=r'/SNS/CORELLI/IPTS-12008/shared/SNS/CORELLI/IPTS-12008/nexus/'+filename+'.nxs.h5',OutputWorkspace=filename,BankName='bank42',SingleBankPixelsOnly='0',FilterByTofMin='0',FilterByTofMax='16667')
LoadInstrument(Workspace=filename,Filename='/SNS/users/rwp/CORELLI_Definition.xml')

sequence = map(float,ws.getInstrument().getComponentByName('correlation-chopper').getStringParameter('sequence')[0].split())
sequence_sum=np.cumsum(sequence)

#i=ws.getInstrument()
#r=i.getDetector(169600).getDistance(i.getSample())

chopper_tdc = ws.getSampleDetails().getProperty("chopper4_TDC").times
#chopper_frq = ws.getSampleDetails().getProperty("BL9:Chop:Skf4:SpeedSetReq").value[0]
chopper_frq = ws.getSampleDetails().getProperty("BL9:Chop:Skf4:MotorSpeed").timeAverageValue()
chopper_per = 1e6/chopper_frq

print 'Chopper Frequency =', chopper_frq, 'Hz, Period =', chopper_per, 'uS'

bin_size=10. # 10ms bins
y = int(math.ceil( chopper_per/bin_size ))
x = int(math.ceil( 1e6/60/bin_size ))
out = np.zeros((y,x))
total_counts=0


for pi in range(0,4): #4
Example #44
0
def bayesian_blocks(t, x=None, sigma=None,
                    fitness='events', **kwargs):
    """Bayesian Blocks Implementation
    This is a flexible implementation of the Bayesian Blocks algorithm
    described in Scargle 2012 [1]_
    Parameters
    ----------
    t : array_like
        data times (one dimensional, length N)
    x : array_like (optional)
        data values
    sigma : array_like or float (optional)
        data errors
    fitness : str or object
        the fitness function to use.
        If a string, the following options are supported:
        - 'events' : binned or unbinned event data
            extra arguments are `p0`, which gives the false alarm probability
            to compute the prior, or `gamma` which gives the slope of the
            prior on the number of bins.
        - 'regular_events' : non-overlapping events measured at multiples
            of a fundamental tick rate, `dt`, which must be specified as an
            additional argument.  The prior can be specified through `gamma`,
            which gives the slope of the prior on the number of bins.
        - 'measures' : fitness for a measured sequence with Gaussian errors
            The prior can be specified using `gamma`, which gives the slope
            of the prior on the number of bins.  If `gamma` is not specified,
            then a simulation-derived prior will be used.
        Alternatively, the fitness can be a user-specified object of
        type derived from the FitnessFunc class.
    Returns
    -------
    edges : ndarray
        array containing the (N+1) bin edges
    Examples
    --------
    Event data:
        t = np.random.normal(size=100)
        bins = bayesian_blocks(t, fitness='events', p0=0.01)
    Event data with repeats:
        t = np.random.normal(size=100)
        t[80:] = t[:20]
        bins = bayesian_blocks(t, fitness='events', p0=0.01)
    Regular event data:
        dt = 0.01
        t = dt * np.arange(1000)
        x = np.zeros(len(t))
        x[np.random.randint(0, len(t), int(len(t) / 10))] = 1
        bins = bayesian_blocks(t, x, fitness='regular_events', dt=dt, gamma=0.9)
    Measured point data with errors:
        t = 100 * np.random.random(100)
        x = np.exp(-0.5 * (t - 50) ** 2)
        sigma = 0.1
        x_obs = np.random.normal(x, sigma)
        bins = bayesian_blocks(t, x=x_obs, fitness='measures')
    References
    ----------
    .. [1] Scargle, J `et al.` (2012)
           http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
    See Also
    --------
    astroML.plotting.hist : histogram plotting function which can make use
                            of bayesian blocks.
    """
    # validate array input
    t = np.asarray(t, dtype=float)
    if x is not None:
        x = np.asarray(x)
    if sigma is not None:
        sigma = np.asarray(sigma)

    # verify the fitness function
    if fitness == 'events':
        if x is not None and np.any(x % 1 > 0):
            raise ValueError("x must be integer counts for fitness='events'")
        fitfunc = Events(**kwargs)
    elif fitness == 'regular_events':
        if x is not None and (np.any(x % 1 > 0) or np.any(x > 1)):
            raise ValueError("x must be 0 or 1 for fitness='regular_events'")
        fitfunc = RegularEvents(**kwargs)
    elif fitness == 'measures':
        if x is None:
            raise ValueError("x must be specified for fitness='measures'")
        fitfunc = PointMeasures(**kwargs)
    else:
        if not (hasattr(fitness, 'args') and
                hasattr(fitness, 'fitness') and
                hasattr(fitness, 'prior')):
            raise ValueError("fitness not understood")
        fitfunc = fitness

    # find unique values of t
    t = np.array(t, dtype=float)
    assert t.ndim == 1
    unq_t, unq_ind, unq_inv = np.unique(t, return_index=True,
                                        return_inverse=True)

    # if x is not specified, x will be counts at each time
    if x is None:
        if sigma is not None:
            raise ValueError("If sigma is specified, x must be specified")

        if len(unq_t) == len(t):
            x = np.ones_like(t)
        else:
            x = np.bincount(unq_inv)

        t = unq_t
        sigma = 1

    # if x is specified, then we need to sort t and x together
    else:
        x = np.asarray(x)

        if len(t) != len(x):
            raise ValueError("Size of t and x does not match")

        if len(unq_t) != len(t):
            raise ValueError("Repeated values in t not supported when "
                             "x is specified")
        t = unq_t
        x = x[unq_ind]

    # verify the given sigma value
    N = t.size
    if sigma is not None:
        sigma = np.asarray(sigma)
        if sigma.shape not in [(), (1,), (N,)]:
            raise ValueError('sigma does not match the shape of x')
    else:
        sigma = 1

    # validate the input
    fitfunc.validate_input(t, x, sigma)

    # compute values needed for computation, below
    if 'a_k' in fitfunc.args:
        ak_raw = np.ones_like(x) / sigma / sigma
    if 'b_k' in fitfunc.args:
        bk_raw = x / sigma / sigma
    if 'c_k' in fitfunc.args:
        ck_raw = x * x / sigma / sigma

    # create length-(N + 1) array of cell edges
    edges = np.concatenate([t[:1],
                            0.5 * (t[1:] + t[:-1]),
                            t[-1:]])
    block_length = t[-1] - edges

    # arrays to store the best configuration
    best = np.zeros(N, dtype=float)
    last = np.zeros(N, dtype=int)

    #-----------------------------------------------------------------
    # Start with first data cell; add one cell at each iteration
    #-----------------------------------------------------------------
    for R in range(N):
        # Compute fit_vec : fitness of putative last block (end at R)
        kwds = {}

        # T_k: width/duration of each block
        if 'T_k' in fitfunc.args:
            kwds['T_k'] = block_length[:R + 1] - block_length[R + 1]

        # N_k: number of elements in each block
        if 'N_k' in fitfunc.args:
            kwds['N_k'] = np.cumsum(x[:R + 1][::-1])[::-1]

        # a_k: eq. 31
        if 'a_k' in fitfunc.args:
            kwds['a_k'] = 0.5 * np.cumsum(ak_raw[:R + 1][::-1])[::-1]

        # b_k: eq. 32
        if 'b_k' in fitfunc.args:
            kwds['b_k'] = - np.cumsum(bk_raw[:R + 1][::-1])[::-1]

        # c_k: eq. 33
        if 'c_k' in fitfunc.args:
            kwds['c_k'] = 0.5 * np.cumsum(ck_raw[:R + 1][::-1])[::-1]

        # evaluate fitness function
        fit_vec = fitfunc.fitness(**kwds)

        A_R = fit_vec - fitfunc.prior(R + 1, N)
        A_R[1:] += best[:R]

        i_max = np.argmax(A_R)
        last[R] = i_max
        best[R] = A_R[i_max]

    #-----------------------------------------------------------------
    # Now find changepoints by iteratively peeling off the last block
    #-----------------------------------------------------------------
    change_points = np.zeros(N, dtype=int)
    i_cp = N
    ind = N
    while True:
        i_cp -= 1
        change_points[i_cp] = ind
        if ind == 0:
            break
        ind = last[ind - 1]
    change_points = change_points[i_cp:]
    bins = edges[change_points]
    bins[0] = -np.inf
    bins[-1] = np.inf

    return bins
Example #45
0
    def deg_gen(A, K, D, m, eta, gamma, model_var, s_fun):
        mseed = np.size(np.where(A.flat)) // 2

        k = np.sum(A, axis=1)

        if type(model_var) == tuple:
            mv1, mv2 = model_var
        else:
            mv1, mv2 = model_var, model_var

        if mv1 in ('powerlaw', 'power_law'):
            Fd = D**eta
        elif mv1 in ('exponential', ):
            Fd = np.exp(eta * D)

        if mv2 in ('powerlaw', 'power_law'):
            Fk = K**gamma
        elif mv2 in ('exponential', ):
            Fk = np.exp(gamma * K)

        P = Fd * Fk * np.logical_not(A)
        u, v = np.where(np.triu(np.ones((n, n)), 1))

        b = np.zeros((m, ), dtype=int)

        #        print(mseed)
        #        print(np.shape(u),np.shape(v))
        #        print(np.shape(b))
        #        print(np.shape(A[u,v]))
        #        print(np.shape(np.where(A[u,v])), 'sqishy')
        #        print(np.shape(P), 'squnnaq')

        #b[:mseed] = np.where(A[np.ix_(u,v)])
        b[:mseed] = np.squeeze(np.where(A[u, v]))
        #print(mseed, m)
        for i in range(mseed, m):
            C = np.append(0, np.cumsum(P[u, v]))
            r = np.sum(np.random.random() * C[-1] >= C)
            uu = u[r]
            vv = v[r]
            k[uu] += 1
            k[vv] += 1

            if mv2 in ('powerlaw', 'power_law'):
                Fk[:, uu] = Fk[uu, :] = s_fun(k, k[uu])**gamma
                Fk[:, vv] = Fk[vv, :] = s_fun(k, k[vv])**gamma
            elif mv2 in ('exponential', ):
                Fk[:, uu] = Fk[uu, :] = np.exp(s_fun(k, k[uu]) * gamma)
                Fk[:, vv] = Fk[vv, :] = np.exp(s_fun(k, k[vv]) * gamma)

            P = Fd * Fk

            b[i] = r

            P[u[b[:i]], v[b[:i]]] = P[v[b[:i]], u[b[:i]]] = 0

            A[u[r], v[r]] = A[v[r], u[r]] = 1
            #P[b[u[:i]], b[v[:i]]] = P[b[v[:i]], b[u[:i]]] = 0

            #A[uu,vv] = A[vv,uu] = 1


#        indx = v*n + u
#        indx[b]
#
#        nH = np.zeros((n,n))
#        nH.ravel()[indx[b]]=1
#
#        nG = np.zeros((n,n))
#        nG[ u[b], v[b] ]=1
#        nG = nG + nG.T
#
#        print(np.shape(np.where(A != nG)))
#
#        import pdb
#        pdb.set_trace()

        return A
Example #46
0
    def build (self, 
               coord_, 
               atype_,
               natoms,
               box, 
               mesh,
               input_dict,
               suffix = '', 
               reuse = None):

        with tf.variable_scope('model_attr' + suffix, reuse = reuse) :
            t_tmap = tf.constant(' '.join(self.type_map), 
                                 name = 'tmap', 
                                 dtype = tf.string)
            t_mt = tf.constant(self.model_type, 
                               name = 'model_type', 
                               dtype = tf.string)

            if self.srtab is not None :
                tab_info, tab_data = self.srtab.get()
                self.tab_info = tf.get_variable('t_tab_info',
                                                tab_info.shape,
                                                dtype = tf.float64,
                                                trainable = False,
                                                initializer = tf.constant_initializer(tab_info, dtype = tf.float64))
                self.tab_data = tf.get_variable('t_tab_data',
                                                tab_data.shape,
                                                dtype = tf.float64,
                                                trainable = False,
                                                initializer = tf.constant_initializer(tab_data, dtype = tf.float64))

        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
        atype = tf.reshape (atype_, [-1, natoms[1]])

        dout \
            = self.descrpt.build(coord_,
                                 atype_,
                                 natoms,
                                 box,
                                 mesh,
                                 davg = self.davg,
                                 dstd = self.dstd,
                                 suffix = suffix,
                                 reuse = reuse)
        dout = tf.identity(dout, name='o_descriptor')

        if self.srtab is not None :
            nlist, rij, sel_a, sel_r = self.descrpt.get_nlist()
            nnei_a = np.cumsum(sel_a)[-1]
            nnei_r = np.cumsum(sel_r)[-1]

        atom_ener = self.fitting.build (dout, 
                                        input_dict, 
                                        natoms, 
                                        bias_atom_e = self.bias_atom_e, 
                                        reuse = reuse, 
                                        suffix = suffix)

        if self.srtab is not None :
            sw_lambda, sw_deriv \
                = op_module.soft_min_switch(atype, 
                                            rij, 
                                            nlist,
                                            natoms,
                                            sel_a = sel_a,
                                            sel_r = sel_r,
                                            alpha = self.smin_alpha,
                                            rmin = self.sw_rmin,
                                            rmax = self.sw_rmax)            
            inv_sw_lambda = 1.0 - sw_lambda
            # NOTICE:
            # atom energy is not scaled, 
            # force and virial are scaled
            tab_atom_ener, tab_force, tab_atom_virial \
                = op_module.tab_inter(self.tab_info,
                                      self.tab_data,
                                      atype,
                                      rij,
                                      nlist,
                                      natoms,
                                      sw_lambda,
                                      sel_a = sel_a,
                                      sel_r = sel_r)
            energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, natoms[0]])
            tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1])
            atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener
            energy_raw = tab_atom_ener + atom_ener
        else :
            energy_raw = atom_ener

        energy_raw = tf.reshape(energy_raw, [-1, natoms[0]], name = 'o_atom_energy'+suffix)
        energy = tf.reduce_sum(global_cvt_2_ener_float(energy_raw), axis=1, name='o_energy'+suffix)

        force, virial, atom_virial \
            = self.descrpt.prod_force_virial (atom_ener, natoms)

        if self.srtab is not None :
            sw_force \
                = op_module.soft_min_force(energy_diff, 
                                           sw_deriv,
                                           nlist, 
                                           natoms,
                                           n_a_sel = nnei_a,
                                           n_r_sel = nnei_r)
            force = force + sw_force + tab_force

        force = tf.reshape (force, [-1, 3 * natoms[1]], name = "o_force"+suffix)

        if self.srtab is not None :
            sw_virial, sw_atom_virial \
                = op_module.soft_min_virial (energy_diff,
                                             sw_deriv,
                                             rij,
                                             nlist,
                                             natoms,
                                             n_a_sel = nnei_a,
                                             n_r_sel = nnei_r)
            atom_virial = atom_virial + sw_atom_virial + tab_atom_virial
            virial = virial + sw_virial \
                     + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis = 1)

        virial = tf.reshape (virial, [-1, 9], name = "o_virial"+suffix)
        atom_virial = tf.reshape (atom_virial, [-1, 9 * natoms[1]], name = "o_atom_virial"+suffix)

        model_dict = {}
        model_dict['energy'] = energy
        model_dict['force'] = force
        model_dict['virial'] = virial
        model_dict['atom_ener'] = energy_raw
        model_dict['atom_virial'] = atom_virial
        
        return model_dict
Example #47
0
def calc_ks_distance(pd_dataframe, ww, precision, time_start, time_split, time_end, crop=True, autobin=False):
    """
    Calculate the Hellinger distance between reference (divided at time_split) and target period

    :param pd_dataframe: input pandas dataframe that is split into target and reference parts
    :param ww: window width
    :param precision: the number of points to evaluate the PDF
    :param time_start: start of series
    :param time_split: where to split the series
    :param time_end: where to stop
    :param crop: set values at half window size at the beginning and end of the time series to NaN
    :param autobin: automatically determine bin number and bin sizes
    :return: the hellinger distances of each target series time step with respect to the reference, and the sign
    """
    #
    # pd_dataframe = new_data_pd
    # ww = WW
    # precision = precision
    # time_start = XMIN
    # time_split = XSPLIT
    # time_end = XMAX
    # autobin = True


    # make copy .. some issues before without ...
    ts_index = pd_dataframe.index
    t_2d_ann_pd = copy(pd_dataframe)
    OUT_overlap_pd = copy(pd.DataFrame(t_2d_ann_pd))
    OUT_overlap_pd[:] = np.NaN
    OUT_overlap_pd_sign = copy(pd.DataFrame(t_2d_ann_pd))
    OUT_overlap_pd_sign[:] = np.NaN
    # get they column keys (lat/lon)
    RowCols = t_2d_ann_pd.keys()

    # RowCol loop; go through the keys of pandas dataframe that are the row and column indices
    for RowCol in RowCols:
        # RowCol = u'0061.00113.0'
        # all
        y_all = t_2d_ann_pd[RowCol].values

        # check if there is data; some pixels are only NA
        if np.isnan(y_all).all():
            continue

        # check if every data record is 0
        if (y_all == 0).all():
            continue

        # reference
        y_ref = t_2d_ann_pd[RowCol].ix[time_start:time_split].values
        if np.isnan(y_ref).all():
            continue

        # target
        y_tar = t_2d_ann_pd[RowCol].ix[time_split:time_end].values

        # target - half window
        y_tar_ww = t_2d_ann_pd[RowCol].ix[str(int(time_split.split('-')[0]) - int(ww / 2)) + '-01-01':time_end]
        y_times_tar_ww = t_2d_ann_pd[RowCol].ix[
                         str(int(time_split.split('-')[0]) - int(ww / 2)) + '-01-01':time_end].index
        if np.isnan(y_tar).all():
            continue

        # get the values for evaluation
        ybnds = x_range(y_all, stretch=1)
        # get the values for evaluation
        if autobin:
            # VERSION 1:
            # for the autmoatic bin size determination:
            # find the increment of the reference period and use this increment
            # to generate a seq from min to max y value
            _, tmp_bin_cent = np.histogram(y_ref)
            tmp_bin_inc = np.diff(tmp_bin_cent).mean()
            tmp_xseq_steps_n = np.diff(ybnds[0:2]) / tmp_bin_inc
            tmp_xseq = np.linspace(ybnds[0], ybnds[1], tmp_xseq_steps_n)
            # --- Version 1 is problematic for HD --- #

            # # VERSION 2:
            # # for the autmoatic bin size determination:
            # # find the increment of the reference period and use this increment
            # # to generate a seq from min to max y value
            # _, tmp_bin_cent = np.histogram(y_all)
            # tmp_bin_inc = np.diff(tmp_bin_cent).mean()
            # tmp_xseq_steps_n = np.diff(ybnds[0:2]) / tmp_bin_inc
            # tmp_xseq = np.linspace(ybnds[0], ybnds[1], tmp_xseq_steps_n)

        else:
            tmp_xseq = np.linspace(ybnds[0], ybnds[1], precision)
            # tmp_xseq_centers = tmp_xseq[0:-1] + ((ybnds[1] - ybnds[0]) / precision)


        ref_h_freq, _ = np.histogram(y_ref, bins=tmp_xseq)
        ref_pdf_list = ref_h_freq / (np.sum(ref_h_freq) * 1.0)
        ref_cdf_list = np.cumsum(ref_pdf_list)

        # tar_h_freq, _ = np.histogram(y_tar, bins=tmp_xseq)
        # tar_pdf_list = tar_h_freq / (np.sum(tar_h_freq) * 1.0)
        # tar_cdf_list = np.cumsum(tar_pdf_list)

        # dist_ks = np.nanmax(np.abs(tar_cdf_list - ref_cdf_list))
        dist_ks, dist_ks_sign = calc_run_ks_hist(time_seq=y_times_tar_ww,
                                                 tar_values=y_tar_ww,
                                                 ref_values=ref_cdf_list,
                                                 ww=ww,
                                                 bin_centers=tmp_xseq)

        # and finally write into complete PD data frame
        OUT_overlap_pd[RowCol] = pd.DataFrame(dist_ks, index=y_times_tar_ww)

        if crop:
            # set values at (index <= XSPLIT and index > time_end - (ww/2)) to NaN
            time_split_year = re.split('-', time_split)[0]
            time_end_year = ts_index.max()
            ind_crop_lw = OUT_overlap_pd.index <= int(time_split_year)
            ind_crop_up = OUT_overlap_pd.index > int(time_end_year - int(ww / 2))
            # OUT_overlap_pd_sign.ix[(ind_crop_lw | ind_crop_up)] = np.NaN
            OUT_overlap_pd.ix[(ind_crop_lw | ind_crop_up)] = np.NaN

    return OUT_overlap_pd #, OUT_overlap_pd_sign
Example #48
0
    def matching_gen(A, K, D, m, eta, gamma, model_var):
        K += epsilon

        mseed = np.size(np.where(A.flat)) // 2

        if type(model_var) == tuple:
            mv1, mv2 = model_var
        else:
            mv1, mv2 = model_var, model_var

        if mv1 in ('powerlaw', 'power_law'):
            Fd = D**eta
        elif mv1 in ('exponential', ):
            Fd = np.exp(eta * D)

        if mv2 in ('powerlaw', 'power_law'):
            Fk = K**gamma
        elif mv2 in ('exponential', ):
            Fk = np.exp(gamma * K)

        Ff = Fd * Fk * np.logical_not(A)
        u, v = np.where(np.triu(np.ones((n, n)), 1))

        for ii in range(mseed, m):
            C = np.append(0, np.cumsum(Ff[u, v]))
            r = np.sum(np.random.random() * C[-1] >= C)
            uu = u[r]
            vv = v[r]
            A[uu, vv] = A[vv, uu] = 1

            updateuu, = np.where(np.inner(A, A[:, uu]))
            np.delete(updateuu, np.where(updateuu == uu))
            np.delete(updateuu, np.where(updateuu == vv))

            c1 = np.append(A[:, uu], A[uu, :])
            for i in range(len(updateuu)):
                j = updateuu[i]
                c2 = np.append(A[:, j], A[j, :])

                use = np.logical_or(c1, c2)
                use[uu] = use[uu + n] = use[j] = use[j + n] = 0
                ncon = np.sum(c1[use]) + np.sum(c2[use])
                if ncon == 0:
                    K[uu, j] = K[j, uu] = epsilon
                else:
                    K[uu, j] = K[j, uu] = (
                        2 / ncon * np.sum(np.logical_and(c1[use], c2[use])) +
                        epsilon)

            updatevv, = np.where(np.inner(A, A[:, vv]))
            np.delete(updatevv, np.where(updatevv == uu))
            np.delete(updatevv, np.where(updatevv == vv))

            c1 = np.append(A[:, vv], A[vv, :])
            for i in range(len(updatevv)):
                j = updatevv[i]
                c2 = np.append(A[:, j], A[j, :])

                use = np.logical_or(c1, c2)
                use[vv] = use[vv + n] = use[j] = use[j + n] = 0
                ncon = np.sum(c1[use]) + np.sum(c2[use])
                if ncon == 0:
                    K[vv, j] = K[j, vv] = epsilon
                else:
                    K[vv, j] = K[j, vv] = (
                        2 / ncon * np.sum(np.logical_and(c1[use], c2[use])) +
                        epsilon)

            Ff = Fd * Fk * np.logical_not(A)

        return A
    print("tEnd = ", tEnd, ", deltat = ", deltat)
    T = np.arange(tBegin, tEnd, deltat)

    ##     ##
    Q = np.identity(len(X0)) * 0.1
    print(Q)
    X = X0

    LCE_T = []
    norm_T = []
    for t in T:
        X = integrate.odeint(Lor, X, (0, deltat))[-1]
        B = Q + deltat * np.dot(JacLor(t, X), Q)
        Q, R = np.linalg.qr(
            B
        )  # Factor the matrix B as qr, where q is orthonormal and r is upper-triangular.
        LCE_T.append(np.log2(np.abs(np.diag(R))) / deltat)
    LCE_T = np.array(LCE_T)
    ##    ##

    # Discard initial timestesp
    i_init = 10
    LCEv = np.cumsum(LCE_T[i_init:, :], 0) / (T / deltat + deltat)[i_init:,
                                                                   None]
    print("\nSystems's Lyapunov Exponents = ", LCEv[-1])

    # Plot
    plt.plot(T[i_init:], LCEv)
    plt.xlabel("Time")
    plt.show()
Example #50
0
    def clu_gen(A, K, D, m, eta, gamma, model_var, x_fun):
        mseed = np.size(np.where(A.flat)) // 2

        A = A > 0

        if type(model_var) == tuple:
            mv1, mv2 = model_var
        else:
            mv1, mv2 = model_var, model_var

        if mv1 in ('powerlaw', 'power_law'):
            Fd = D**eta
        elif mv1 in ('exponential', ):
            Fd = np.exp(eta * D)

        if mv2 in ('powerlaw', 'power_law'):
            Fk = K**gamma
        elif mv2 in ('exponential', ):
            Fk = np.exp(gamma * K)

        c = clustering_coef_bu(A)
        k = np.sum(A, axis=1)

        Ff = Fd * Fk * np.logical_not(A)
        u, v = np.where(np.triu(np.ones((n, n)), 1))

        #print(mseed, m)
        for i in range(mseed + 1, m):
            C = np.append(0, np.cumsum(Ff[u, v]))
            r = np.sum(np.random.random() * C[-1] >= C)
            uu = u[r]
            vv = v[r]
            A[uu, vv] = A[vv, uu] = 1
            k[uu] += 1
            k[vv] += 1

            bu = A[uu, :].astype(bool)
            bv = A[vv, :].astype(bool)
            su = A[np.ix_(bu, bu)]
            sv = A[np.ix_(bu, bu)]

            bth = np.logical_and(bu, bv)
            c[bth] += 2 / (k[bth]**2 - k[bth])
            c[uu] = np.size(np.where(su.flat)) / (k[uu] * (k[uu] - 1))
            c[vv] = np.size(np.where(sv.flat)) / (k[vv] * (k[vv] - 1))
            c[k <= 1] = 0
            bth[uu] = 1
            bth[vv] = 1

            k_result = x_fun(c, bth)

            #print(np.shape(k_result))
            #print(np.shape(K))
            #print(K)
            #print(np.shape(K[bth,:]))

            K[bth, :] = k_result
            K[:, bth] = k_result.T

            if mv2 in ('powerlaw', 'power_law'):
                Ff[bth, :] = Fd[bth, :] * K[bth, :]**gamma
                Ff[:, bth] = Fd[:, bth] * K[:, bth]**gamma
            elif mv2 in ('exponential', ):
                Ff[bth, :] = Fd[bth, :] * np.exp(K[bth, :]) * gamma
                Ff[:, bth] = Fd[:, bth] * np.exp(K[:, bth]) * gamma

            Ff = Ff * np.logical_not(A)

        return A
Example #51
0
def drawPNL(dtesPnl,
            pnl,
            dtes,
            strategy_name,
            showFigure='no',
            toDatabase='no',
            dateStart=-1,
            pnlType='pnl'):
    db = db_quanLiang()
    if dateStart == -1:
        s1 = np.nonzero(dtes >= dtesPnl[0])[0][0]
    else:
        s1 = np.nonzero(dtes >= dateStart)[0][0]
    #s2 = np.nonzero(dtes<=dtesPnl[-1])[0][-1]
    d = dtes[s1:]
    r = np.zeros(d.shape)
    l = list(d)
    for (i, x) in enumerate(dtesPnl):
        r[l.index(x)] = pnl[i]
    r = np.array(r)
    m = np.mean(r)
    m2 = np.mean(r[r != 0])
    v = np.std(r)
    v2 = np.std(r[r != 0])
    s = m / v * np.sqrt(250)
    ar = np.maximum.accumulate(np.cumsum(r)) - np.cumsum(r)
    md = np.max(ar)
    dailyReturnRate = np.round(m, 5)
    tradeReturnRate = np.round(m2, 5)
    dailyStd = np.round(v, 5)
    tradeStd = np.round(v2, 5)
    sharpe = np.round(s, 2)
    mdd = np.round(md, 5)
    sortino = np.round(m * 250 / mdd, 2)
    if showFigure == 'yes':
        plt.figure()
        plt.plot(dtes2Label(d), np.cumsum(r))
        plt.grid()
        plt.gcf().autofmt_xdate()  # 自动旋转日期标记
        plt.title(strategy_name + ' ' + pnlType + ' 策略平均日回报率:' +
                  str(dailyReturnRate * 100) + '%, 平均每笔交易回报率:' +
                  str(tradeReturnRate * 100) + '%,平均波动:' +
                  str(dailyStd * 1e2) + '%, 平均每笔交易波动率:' + str(tradeStd * 1e2) +
                  '%, sharpe值:' + str(sharpe) + ' 最大回撤:' + str(mdd * 100))
    if (toDatabase == 'yes'):
        statisticsInfo = {
            'tag': pnlType,
            '平均日回报率': dailyReturnRate,
            '平均每笔交易回报率': tradeReturnRate,
            '平均日波动率': dailyStd,
            '平均每笔交易波动率': tradeStd,
            'Sharpe值': sharpe,
            '最大回撤': mdd,
            '索提诺比率': sortino
        }
        if pnlType == 'pnl':
            db.strategyBackTest.update_one({'strategy_name': strategy_name}, {
                '$set': {
                    'labels': list([int(x) for x in d]),
                    'pnl': list(np.cumsum(r))
                }
            },
                                           upsert=True)
            db.strategyBackTest.update_one({'strategy_name': strategy_name},
                                           {'$set': {
                                               'statistics': []
                                           }},
                                           upsert=True)
            db.strategyBackTest.update_one(
                {'strategy_name': strategy_name},
                {'$set': {
                    'performance': statisticsInfo
                }},
                upsert=True)
        else:
            db.strategyBackTest.update_one(
                {'strategy_name': strategy_name},
                {'$set': {
                    pnlType: list(np.cumsum(r))
                }},
                upsert=True)
        db.strategyBackTest.update_one(
            {'strategy_name': strategy_name},
            {'$push': {
                'statistics': statisticsInfo
            }})
Example #52
0
def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0))
    return (cumsum[N:] - cumsum[:-N]) / float(N)
def evaluate_matches(matches):
    overlaps = opt.overlaps
    min_region_sizes = [ opt.min_region_sizes[0] ]
    dist_threshes = [ opt.distance_threshes[0] ]
    dist_confs = [ opt.distance_confs[0] ]
    
    # results: class x overlap
    ap = np.zeros( (len(dist_threshes) , len(CLASS_LABELS) , len(overlaps)) , np.float )
    for di, (min_region_size, distance_thresh, distance_conf) in enumerate(zip(min_region_sizes, dist_threshes, dist_confs)):
        for oi, overlap_th in enumerate(overlaps):
            pred_visited = {}
            for m in matches:
                for p in matches[m]['pred']:
                    for label_name in CLASS_LABELS:
                        for p in matches[m]['pred'][label_name]:
                            if 'filename' in p:
                                pred_visited[p['filename']] = False
            for li, label_name in enumerate(CLASS_LABELS):
                y_true = np.empty(0)
                y_score = np.empty(0)
                hard_false_negatives = 0
                has_gt = False
                has_pred = False
                for m in matches:
                    pred_instances = matches[m]['pred'][label_name]
                    gt_instances = matches[m]['gt'][label_name]
                    # filter groups in ground truth
                    gt_instances = [ gt for gt in gt_instances if gt['instance_id']>=1000 and gt['vert_count']>=min_region_size and gt['med_dist']<=distance_thresh and gt['dist_conf']>=distance_conf ]
                    if gt_instances:
                        has_gt = True
                    if pred_instances:
                        has_pred = True

                    cur_true  = np.ones ( len(gt_instances) )
                    cur_score = np.ones ( len(gt_instances) ) * (-float("inf"))
                    cur_match = np.zeros( len(gt_instances) , dtype=np.bool )
                    # collect matches
                    for (gti,gt) in enumerate(gt_instances):
                        found_match = False
                        num_pred = len(gt['matched_pred'])
                        for pred in gt['matched_pred']:
                            # greedy assignments
                            if pred_visited[pred['filename']]:
                                continue
                            overlap = float(pred['intersection']) / (gt['vert_count']+pred['vert_count']-pred['intersection'])
                            if overlap > overlap_th:
                                confidence = pred['confidence']
                                # if already have a prediction for this gt,
                                # the prediction with the lower score is automatically a false positive
                                if cur_match[gti]:
                                    max_score = max( cur_score[gti] , confidence )
                                    min_score = min( cur_score[gti] , confidence )
                                    cur_score[gti] = max_score
                                    # append false positive
                                    cur_true  = np.append(cur_true,0)
                                    cur_score = np.append(cur_score,min_score)
                                    cur_match = np.append(cur_match,True)
                                # otherwise set score
                                else:
                                    found_match = True
                                    cur_match[gti] = True
                                    cur_score[gti] = confidence
                                    pred_visited[pred['filename']] = True
                        if not found_match:
                            hard_false_negatives += 1
                    # remove non-matched ground truth instances
                    cur_true  = cur_true [ cur_match==True ]
                    cur_score = cur_score[ cur_match==True ]

                    # collect non-matched predictions as false positive
                    for pred in pred_instances:
                        found_gt = False
                        for gt in pred['matched_gt']:
                            overlap = float(gt['intersection']) / (gt['vert_count']+pred['vert_count']-gt['intersection'])
                            if overlap > overlap_th:
                                found_gt = True
                                break
                        if not found_gt:
                            num_ignore = pred['void_intersection']
                            for gt in pred['matched_gt']:
                                # group?
                                if gt['instance_id'] < 1000:
                                    num_ignore += gt['intersection']
                                # small ground truth instances
                                if gt['vert_count'] < min_region_size or gt['med_dist']>distance_thresh or gt['dist_conf']<distance_conf:
                                    num_ignore += gt['intersection']
                            proportion_ignore = float(num_ignore)/pred['vert_count']
                            # if not ignored append false positive
                            if proportion_ignore <= overlap_th:
                                cur_true = np.append(cur_true,0)
                                confidence = pred["confidence"]
                                cur_score = np.append(cur_score,confidence)

                    # append to overall results
                    y_true  = np.append(y_true,cur_true)
                    y_score = np.append(y_score,cur_score)

                # compute average precision
                if has_gt and has_pred:
                    # compute precision recall curve first

                    # sorting and cumsum
                    score_arg_sort      = np.argsort(y_score)
                    y_score_sorted      = y_score[score_arg_sort]
                    y_true_sorted       = y_true[score_arg_sort]
                    y_true_sorted_cumsum = np.cumsum(y_true_sorted)

                    # unique thresholds
                    (thresholds,unique_indices) = np.unique( y_score_sorted , return_index=True )
                    num_prec_recall = len(unique_indices) + 1

                    # prepare precision recall
                    num_examples      = len(y_score_sorted)
                    num_true_examples = y_true_sorted_cumsum[-1]
                    precision         = np.zeros(num_prec_recall)
                    recall            = np.zeros(num_prec_recall)

                    # deal with the first point
                    y_true_sorted_cumsum = np.append( y_true_sorted_cumsum , 0 )
                    # deal with remaining
                    for idx_res,idx_scores in enumerate(unique_indices):
                        cumsum = y_true_sorted_cumsum[idx_scores-1]
                        tp = num_true_examples - cumsum
                        fp = num_examples      - idx_scores - tp
                        fn = cumsum + hard_false_negatives
                        p  = float(tp)/(tp+fp)
                        r  = float(tp)/(tp+fn)
                        precision[idx_res] = p
                        recall   [idx_res] = r

                    # first point in curve is artificial
                    precision[-1] = 1.
                    recall   [-1] = 0.

                    # compute average of precision-recall curve
                    recall_for_conv = np.copy(recall)
                    recall_for_conv = np.append(recall_for_conv[0], recall_for_conv)
                    recall_for_conv = np.append(recall_for_conv, 0.)

                    stepWidths = np.convolve(recall_for_conv,[-0.5,0,0.5],'valid')
                    # integrate is now simply a dot product
                    ap_current = np.dot(precision, stepWidths)

                elif has_gt:
                    ap_current = 0.0
                else:
                    ap_current = float('nan')
                ap[di,li,oi] = ap_current
    return ap
Example #54
0
def drawPriceChange(r,
                    strategy_name,
                    timeLabels,
                    title='priceChange',
                    tp=[240, 604],
                    showGraph='yes'):
    if len(r) == 0:
        return
    db = db_quanLiang()
    dbt = db_tinySoftData()
    r[np.isfinite(r) == False] = 0
    priceChange = np.mean(r, axis=0)
    priceChangeStd = np.std(r, axis=0)
    priceChangeStd[np.isfinite(priceChangeStd) == False] = 0
    if (showGraph == 'yes'):
        plt.figure()
        plt.rcParams['font.sans-serif'] = [u'SimHei']
        plt.rcParams['axes.unicode_minus'] = False
        default_dpi = plt.rcParamsDefault['figure.dpi']
        plt.rcParams['figure.dpi'] = default_dpi * 2
        plt.title('平均价格随时间变化图')
        plt.title(title)
        legends = []
        if np.max(tp) < 600:
            linewidth = 1
        else:
            linewidth = 0.25
        for k in tp:
            plt.plot([k, k], [-0.023, 0.013], linewidth=linewidth)
            legends.append(timeLabels[k])
        plt.plot(np.cumsum(priceChange),
                 'b-',
                 marker="o",
                 linewidth=0.25,
                 markersize=0.25)
        plt.plot(priceChangeStd, 'k', linewidth=0.25, markersize=0.25)
        legends.append('分钟价格回报率变化累积')
        legends.append('分钟价格回报率标准差')
        plt.legend(legends, bbox_to_anchor=(1.0, 1.0), loc=2, borderaxespad=0.)
        plt.grid()
    db.strategyMinuteBar.update_one({'strategy_name': strategy_name}, {
        '$set': {
            'labels': timeLabels,
            title: list(np.cumsum(priceChange)),
            title + 'Std': list(priceChangeStd),
            'concernPoints': tp,
            'priceType': '开盘价',
            '买入时间': timeLabels[tp[0]],
            '卖出时间': timeLabels[tp[1]],
        }
    },
                                    upsert=True)
    if (title == 'priceChange'):
        db.strategyBackTest.update_one(
            {'strategy_name': strategy_name},
            {'$set': {
                '买入时间': timeLabels[tp[0]],
                '卖出时间': timeLabels[tp[1]],
            }},
            upsert=True)
    updateStrategyGeneratingStatus(
        strategy_name, '生成进度:55%。价格聚合分析完成。 ' + str(datetime.datetime.now()),
        55)
Example #55
0
plt.savefig(plotName)
plt.close()

if opts.analysisType == "cbclist":
    bounds = [15, 35]
    xlims = [15.0, 35.0]
    ylims = [1, 100000]

    plotName = "%s/rates.pdf" % (plotDir)
    plt.figure(figsize=(10, 8))
    for ii, model in enumerate(models):
        legend_name = get_legend(model)
        bins, hist1 = lightcurve_utils.hist_results(
            model_tables[model]["peak_appmag_i"], Nbins=25, bounds=bounds)
        hist1_cumsum = float(cbccnt) * hist1 / np.sum(hist1)
        hist1_cumsum = np.cumsum(hist1_cumsum)
        plt.semilogy(bins,
                     hist1_cumsum,
                     '-',
                     color=colors_names[ii],
                     linewidth=3,
                     label=legend_name)
    plt.xlabel(r"Apparent Magnitude [mag]", fontsize=24)
    plt.ylabel("Rate of apparent magnitude [per year]", fontsize=24)
    plt.legend(loc="best", prop={'size': 24})
    plt.xticks(fontsize=24)
    plt.yticks(fontsize=24)
    plt.xlim(xlims)
    #plt.ylim(ylims)
    plt.savefig(plotName)
    plt.close()
Example #56
0
 def _compute_cs(self, folded_data, N):
     sigma = np.std(folded_data)
     m = np.mean(folded_data)
     s = np.cumsum(folded_data - m) * 1.0 / (N * sigma)
     R = np.max(s) - np.min(s)
     return R
scaler = preprocessing.StandardScaler().fit(trainx_filled)
train_x = scaler.transform(trainx_filled)
test_x = scaler.transform(testx_filled)

# In[26]:

train_x

# In[27]:

test_x

# In[28]:

pca = PCA().fit(train_x)
itemindex = np.where(np.cumsum(pca.explained_variance_ratio_) > 0.9999)
print('np.cumsum(pca.explained_variance_ratio_)',
      np.cumsum(pca.explained_variance_ratio_))
#Plotting the Cumulative Summation of the Explained Variance
plt.figure(np.cumsum(pca.explained_variance_ratio_)[0])
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Variance (%)')  #for each component
plt.title('Principal Components Explained Variance')
plt.show()
pca_std = PCA(n_components=itemindex[0][0]).fit(train_x)
train_x = pca_std.transform(train_x)
test_x = pca_std.transform(test_x)
print(train_x)
print(test_x)
Example #58
0
def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path):
    """Load tf pre-trained weights in a pytorch model (from NumPy arrays here)"""
    import re

    import numpy as np

    if ".ckpt" in openai_checkpoint_folder_path:
        openai_checkpoint_folder_path = os.path.dirname(openai_checkpoint_folder_path)

    logger.info("Loading weights from {}".format(openai_checkpoint_folder_path))

    with open(openai_checkpoint_folder_path + "/parameters_names.json", "r", encoding="utf-8") as names_handle:
        names = json.load(names_handle)
    with open(openai_checkpoint_folder_path + "/params_shapes.json", "r", encoding="utf-8") as shapes_handle:
        shapes = json.load(shapes_handle)
    offsets = np.cumsum([np.prod(shape) for shape in shapes])
    init_params = [np.load(openai_checkpoint_folder_path + "/params_{}.npy".format(n)) for n in range(10)]
    init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1]
    init_params = [param.reshape(shape) for param, shape in zip(init_params, shapes)]

    # This was used when we had a single embedding matrix for positions and tokens
    # init_params[0] = np.concatenate([init_params[1], init_params[0]], 0)
    # del init_params[1]
    init_params = [arr.squeeze() for arr in init_params]

    try:
        assert model.tokens_embed.weight.shape == init_params[1].shape
        assert model.positions_embed.weight.shape == init_params[0].shape
    except AssertionError as e:
        e.args += (model.tokens_embed.weight.shape, init_params[1].shape)
        e.args += (model.positions_embed.weight.shape, init_params[0].shape)
        raise

    model.tokens_embed.weight.data = torch.from_numpy(init_params[1])
    model.positions_embed.weight.data = torch.from_numpy(init_params[0])
    names.pop(0)
    # Pop position and token embedding arrays
    init_params.pop(0)
    init_params.pop(0)

    for name, array in zip(names, init_params):  # names[1:n_transfer], init_params[1:n_transfer]):
        name = name[6:]  # skip "model/"
        assert name[-2:] == ":0"
        name = name[:-2]
        name = name.split("/")
        pointer = model
        for m_name in name:
            if re.fullmatch(r"[A-Za-z]+\d+", m_name):
                scope_names = re.split(r"(\d+)", m_name)
            else:
                scope_names = [m_name]
            if scope_names[0] == "g":
                pointer = getattr(pointer, "weight")
            elif scope_names[0] == "b":
                pointer = getattr(pointer, "bias")
            elif scope_names[0] == "w":
                pointer = getattr(pointer, "weight")
            else:
                pointer = getattr(pointer, scope_names[0])
            if len(scope_names) >= 2:
                num = int(scope_names[1])
                pointer = pointer[num]
        try:
            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        try:
            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        logger.info("Initialize PyTorch weight {}".format(name))
        pointer.data = torch.from_numpy(array)
    return model
def calc_detection_voc_prec_rec(
        pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
        gt_difficults=None,
        iou_thresh=0.5):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.

    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.

    Args:
        pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
            sets of bounding boxes.
            Its index corresponds to an index for the base dataset.
            Each element of :obj:`pred_bboxes` is a set of coordinates
            of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
            where :math:`R` corresponds
            to the number of bounding boxes, which may vary among boxes.
            The second axis corresponds to
            :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
        pred_labels (iterable of numpy.ndarray): An iterable of labels.
            Similar to :obj:`pred_bboxes`, its index corresponds to an
            index for the base dataset. Its length is :math:`N`.
        pred_scores (iterable of numpy.ndarray): An iterable of confidence
            scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
            its index corresponds to an index for the base dataset.
            Its length is :math:`N`.
        gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
            bounding boxes
            whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
            bounding box whose shape is :math:`(R, 4)`. Note that the number of
            bounding boxes in each image does not need to be same as the number
            of corresponding predicted boxes.
        gt_labels (iterable of numpy.ndarray): An iterable of ground truth
            labels which are organized similarly to :obj:`gt_bboxes`.
        gt_difficults (iterable of numpy.ndarray): An iterable of boolean
            arrays which is organized similarly to :obj:`gt_bboxes`.
            This tells whether the
            corresponding ground truth bounding box is difficult or not.
            By default, this is :obj:`None`. In that case, this function
            considers all bounding boxes to be not difficult.
        iou_thresh (float): A prediction is correct if its Intersection over
            Union with the ground truth is above this value..

    Returns:
        tuple of two lists:
        This function returns two lists: :obj:`prec` and :obj:`rec`.

        * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
            for class :math:`l`. If class :math:`l` does not exist in \
            either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
            set to :obj:`None`.
        * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
            for class :math:`l`. If class :math:`l` that is not marked as \
            difficult does not exist in \
            :obj:`gt_labels`, :obj:`rec[l]` is \
            set to :obj:`None`.

    """

    pred_bboxes = iter(pred_bboxes)
    pred_labels = iter(pred_labels)
    pred_scores = iter(pred_scores)
    gt_bboxes = iter(gt_bboxes)
    gt_labels = iter(gt_labels)
    if gt_difficults is None:
        gt_difficults = itertools.repeat(None)
    else:
        gt_difficults = iter(gt_difficults)

    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)

    for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
            six.moves.zip(
                pred_bboxes, pred_labels, pred_scores,
                gt_bboxes, gt_labels, gt_difficults):

        if gt_difficult is None:
            gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0,) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1

            iou = bbox_iou(pred_bbox_l, gt_bbox_l)
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    for iter_ in (
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults):
        if next(iter_, None) is not None:
            raise ValueError('Length of input iterables need to be same.')

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

    return prec, rec
def angOffset_plateScale(dateString,plotTitle,plot=True):

    # read in pickled star positions in (x,y)
    picklefile = "identified_stars_"+dateString+".p"
    fo1=open(picklefile,'rb')
    dat=pickle.load(fo1)
    fo1.close()

    # read in text file of star positions in (RA,DEC)
    df = pd.read_csv('star_ra_dec_list.csv')

    # initialize quantities
    baselineNumber = 0 # for counting number of baselines
    angleRadecMinusXYArray = [] # for collecting angle differences
    plateScaleArray = [] # for collecting plate scale measurements

    # loop over dither positions
    for ditherPos in range(0,len(dat.keys())):
        
        keyName = "dither_pos_"+"%02i"%ditherPos # key for this dither
        parent = dat[keyName][1] # N star names

        # list of all combinations of names (no degeneracies)
        allCombs = list(itertools.combinations(parent, 2))
    
        # loop over all baselines (i.e., take N, pick 2)
        for baseline in range(0,len(allCombs)):
        
        
            ## find position angle and distance in (x,y) space
        
            # retrieve x, y
            star1name = allCombs[baseline][0]
            star2name = allCombs[baseline][1]
        
            names = np.array(dat[keyName][1][:])
            star1elem = int(np.where(names==star1name)[0]) # element number in names array
            star2elem = int(np.where(names==star2name)[0])
        
            star1elem_2 = dat[keyName][0][star1elem]
            star2elem_2 = dat[keyName][0][star2elem]
        
            star1coords_xy = [dat[keyName][2]['[x]'][star1elem_2],dat[keyName][2]['[y]'][star1elem_2]]
            star2coords_xy = [dat[keyName][2]['[x]'][star2elem_2],dat[keyName][2]['[y]'][star2elem_2]]

            # sort according to y-position
            coords_low, coords_high, pos_angle_xy, name_low, name_high = order_Y(
                star1coords_xy, star2coords_xy, star1name, star2name)
        
            # find distance between the two stars
            del_y = np.subtract(coords_high[1],coords_low[1])
            del_x = np.subtract(coords_high[0],coords_low[0])
            dist_xy = np.sqrt(np.power(del_x,2)+np.power(del_y,2))
        
        
            ## find position angle in (RA,DEC) space
            # (note that stars are in same order, which is important if they have nearly equal DEC or y)
        
            # retrieve RA, DEC
            radecStarElem_low = np.where(df[' shorthand']==' '+name_low)[0] # find element number
            radecStarElem_high = np.where(df[' shorthand']==' '+name_high)[0]
            raString_low = df[' RA'][radecStarElem_low].values
            decString_low = df[' DEC'][radecStarElem_low].values
            raString_high = df[' RA'][radecStarElem_high].values
            decString_high = df[' DEC'][radecStarElem_high].values

            # find angle, separation
            c_low = SkyCoord(raString_low+decString_low, unit=(u.hourangle, u.deg))
            c_high = SkyCoord(raString_high+decString_high, unit=(u.hourangle, u.deg))
            pos_angle_radec = c_low.position_angle(c_high).degree[0] # position angle, E of N
            sep_radec = c_low.separation(c_high).arcsec[0] # separation in asec
        
            baselineNumber += 1 # chalk up this baseline to the total
        
            # how much further E of W is the position angle from (RA,DEC) than (x,y)?
            angleDiff_1 = np.subtract(pos_angle_radec,pos_angle_xy)
        
            if (angleDiff_1 > 0): # if x,y angle opens further E of N than the RA,DEC angle
                angleDiff = np.mod(
                    angleDiff_1,
                    360.) # mod is in case one angle is <0 and the other >180
            else: # if difference between x,y angle and RA,DEC angle is negative
                angleDiff = np.copy(angleDiff_1)
            
            angleRadecMinusXYArray = np.append(angleRadecMinusXYArray,angleDiff) # append del_angle to array
            plateScaleArray = np.append(plateScaleArray,1000.*np.divide(sep_radec,dist_xy)) # append plate scale (mas/pix)

            
    ## make CDFs...
    # ...of angular offsets
    angleDiffArraySorted = sorted(angleRadecMinusXYArray)
    angleDiff_csf = np.cumsum(angleDiffArraySorted).astype("float32")
    angleDiff_csf_norm = np.divide(angleDiff_csf,np.max(angleDiff_csf))
    # ...of plate scales
    plateScaleArraySorted = sorted(plateScaleArray)
    plateScale_csf = np.cumsum(plateScaleArraySorted).astype("float32")
    plateScale_csf_norm = np.divide(plateScale_csf,np.max(plateScale_csf))

    ## find median, +- sigma values...
    # ...of angular offsets
    angleDiff_negSigmaPercentile = np.percentile(angleRadecMinusXYArray,15.9)
    angleDiff_50Percentile = np.percentile(angleRadecMinusXYArray,50)
    angleDiff_posSigmaPercentile = np.percentile(angleRadecMinusXYArray,84.1)
    # ...of plate scales
    plateScale_negSigmaPercentile = np.percentile(plateScaleArray,15.9)
    plateScale_50Percentile = np.percentile(plateScaleArray,50)
    plateScale_posSigmaPercentile = np.percentile(plateScaleArray,84.1)

    # prepare strings
    string1 = '{0:.3f}'.format(angleDiff_50Percentile)
    string2 = '{0:.3f}'.format(np.subtract(angleDiff_posSigmaPercentile,angleDiff_50Percentile))
    string3 = '{0:.3f}'.format(np.subtract(angleDiff_50Percentile,angleDiff_negSigmaPercentile))
    string4 = '{0:.3f}'.format(plateScale_50Percentile)
    string5 = '{0:.3f}'.format(np.subtract(plateScale_posSigmaPercentile,plateScale_50Percentile))
    string6 = '{0:.3f}'.format(np.subtract(plateScale_50Percentile,plateScale_negSigmaPercentile))

    # print info
    print('------------------------------')
    print('Number of stellar pair baselines:')
    print(baselineNumber)
    print('------------------------------')
    print('Need to rotate array E of N:\n'+string1+'/+'+string2+'/-'+string3+' deg')
    print('------------------------------')
    print('Plate scale:')
    print(string4+'/+'+string5+'/-'+string6+' mas/pix')

        
    if (plot):
    
        # plot rotation angle
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.axvline(x=angleDiff_negSigmaPercentile,linestyle='--',color='k')
        ax.axvline(x=angleDiff_50Percentile,linestyle='-',color='k')
        ax.axvline(x=angleDiff_posSigmaPercentile,linestyle='--',color='k')
        ax.scatter(angleDiffArraySorted, angleDiff_csf_norm)
        ax.text(0.8, 0.1,s='Need to rotate array E of N:\n'+string1+'/+'+string2+'/-'+string3+' deg\n\nStellar baselines:\n'+str(baselineNumber))
        plt.title('CDF of difference (E of N) between (RA, DEC) and (x, y) position angles on LMIRcam, '+plotTitle)
        plt.xlabel('Degrees E of N')
        plt.ylabel('Normalized CDF')
        plt.show()

        # plot plate scale
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.axvline(x=plateScale_negSigmaPercentile,linestyle='--',color='k')
        ax.axvline(x=plateScale_50Percentile,linestyle='-',color='k')
        ax.axvline(x=plateScale_posSigmaPercentile,linestyle='--',color='k')
        ax.scatter(plateScaleArraySorted, plateScale_csf_norm)
        ax.text(10.4, 0.8,s='Plate scale:\n'+string4+'/+'+string5+'/-'+string6+' mas/pix\n\nStellar baselines:\n'+str(baselineNumber))
        plt.title('Plate scale of LMIRcam, '+plotTitle)
        plt.xlabel('PS (mas/pix)')
        plt.ylabel('Normalized CDF')
        plt.show()