def Cov(A, B, n):
    '''
    计算两个因子向前n天的协方差
    n >= 2
    '''
    if n < 2:
        #print ("计算A和B n天的协方差,n不得小于2,返回A")
        return A
    result_A = []
    result_B = []
    for i in range(n):
        temp_A = np.roll(A, i, axis=0)
        temp_A[:i] = np.nan
        temp_B = np.roll(B, i, axis=0)
        temp_B[:i] = np.nan
        result_A.append(temp_A)
        result_B.append(temp_B)
    result_A = np.stack(result_A)
    mean = bk.nanmean(result_A, axis=0)
    result_A = result_A - mean
    result_B = np.stack(result_B)
    mean = bk.nanmean(result_B, axis=0)
    result_B = result_B - mean
    result = bk.nanmean(result_A * result_B, axis=0)
    result[np.isnan(A * B)] = np.nan
    return result
Example #2
0
 def compute(self, today, assets, out, closes):
     diffs = diff(closes, axis=0)
     ups = nanmean(clip(diffs, 0, inf), axis=0)
     downs = abs(nanmean(clip(diffs, -inf, 0), axis=0))
     return evaluate(
         "100 - (100 / (1 + (ups / downs)))", local_dict={"ups": ups, "downs": downs}, global_dict={}, out=out
     )
Example #3
0
def test_memory_leak():
    import resource

    arr = np.arange(1).reshape((1, 1))

    starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    for i in range(1000):
        for axis in [None, 0, 1]:
            bn.nansum(arr, axis=axis)
            bn.nanargmax(arr, axis=axis)
            bn.nanargmin(arr, axis=axis)
            bn.nanmedian(arr, axis=axis)
            bn.nansum(arr, axis=axis)
            bn.nanmean(arr, axis=axis)
            bn.nanmin(arr, axis=axis)
            bn.nanmax(arr, axis=axis)
            bn.nanvar(arr, axis=axis)

    ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

    diff = ending - starting
    diff_bytes = diff * resource.getpagesize()
    print(diff_bytes)
    # For 1.3.0 release, this had value of ~100kB
    assert diff_bytes == 0
Example #4
0
def test_memory_leak() -> None:
    import resource

    arr = np.arange(1).reshape((1, 1))

    n_attempts = 3
    results = []

    for _ in range(n_attempts):
        starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        for _ in range(1000):
            for axis in [None, 0, 1]:
                bn.nansum(arr, axis=axis)
                bn.nanargmax(arr, axis=axis)
                bn.nanargmin(arr, axis=axis)
                bn.nanmedian(arr, axis=axis)
                bn.nansum(arr, axis=axis)
                bn.nanmean(arr, axis=axis)
                bn.nanmin(arr, axis=axis)
                bn.nanmax(arr, axis=axis)
                bn.nanvar(arr, axis=axis)

        ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        diff = ending - starting
        diff_bytes = diff * resource.getpagesize()
        # For 1.3.0 release, this had value of ~100kB
        if diff_bytes:
            results.append(diff_bytes)
        else:
            break

    assert len(results) < n_attempts
Example #5
0
def pairwise_covariance(x_mat, y=None, correlation=False):
    x_mat = x_mat.copy()
    x_nan = np.isnan(x_mat)
    if y is not None:
        if y.shape[0] != 1:
            assert y.shape == x_mat.shape, 'y and x_mat should be of the same shape if y has more than 1 rows'
            y_mat = y
        else:
            y_mat = np.tile(y, (x_mat.shape[0], 1))
        y_nan = np.isnan(y_mat)
        x_mat[y_nan] = np.nan
        y_mat[x_nan] = np.nan
        pw_multiply = np.multiply(
            x_mat - bn.nanmean(x_mat, axis=1).reshape(-1, 1),
            y_mat - bn.nanmean(y_mat, axis=1).reshape(-1, 1))
        cov = bn.nansum(pw_multiply, axis=1) / (
            pw_multiply.shape[1] - np.isnan(pw_multiply).sum(axis=1) - 1)
        if correlation:
            return cov / np.multiply(bn.nanstd(x_mat, axis=1, ddof=1),
                                     bn.nanstd(y_mat, axis=1, ddof=1))
        return cov
    else:
        if correlation:
            return pd.DataFrame(x_mat).T.corr().values
        return pd.DataFrame(x_mat).T.cov().values
Example #6
0
def is_guide_ota(primhdu, ext, w=20, debug=False):

    logger = logging.getLogger("IsGuideOTA")

    binning = primhdu.header['BINNING']
    skylevel = primhdu.header['SKYLEVEL']
    gain = primhdu.header['GAIN']
    skynoise = primhdu.header['SKYNOISE']

    logger.debug("Checking OTA %s (bin=%d, sky=%.1f, skynoise=%.2f)" % (
        ext.name, binning, skylevel, skynoise))

    if (not is_image_extension(ext)):
        logger.debug("extension is not a valid image extension")
        return False

    excesses = numpy.empty((8,8))
    excesses[:,:] = numpy.NaN

    if (debug):
        center_hdu = [pyfits.PrimaryHDU()]
        corner_hdu = [pyfits.PrimaryHDU()]

    for cx, cy in itertools.product(range(8), repeat=2):

        #
        # Get pixel coord for this cell
        #
        
        x1,x2,y1,y2 = cell2ota__get_target_region(cx, cy, binning=binning, trimcell=0)
        x1,x2,y1,y2 = int(x1),int(x2),int(y1),int(y2)
        x21 = (x2-x1)//2

        # extract the mean value in the bottom corner
        corner = bottleneck.nanmean(ext.data[y1:y1+w, x1:x1+w].astype(numpy.float32))

        # also get the value in the bottom center
        center = bottleneck.nanmean(ext.data[y1:y1+w, x1+x21-w//2:x1+x21+w//2].astype(numpy.float32))

        if (debug):
            print(cx,cy,corner, center)
            corner_hdu.append(pyfits.ImageHDU(data=ext.data[y1:y1+w, x1:x1+w]))
            center_hdu.append(pyfits.ImageHDU(data=ext.data[y1:y1+w, x1+x21-w//2:x1+x21+w//2]))

        excess = corner - center
        #print ext.name, cx, cy, corner, center, excess
            
        excesses[cx,cy] = excess

    _mean = bottleneck.nanmean(excesses)
    _median = bottleneck.nanmedian(excesses)

    is_guideota = (_median > 10*skynoise)
    logger.debug("Found corner excess mean=%.1f, median=%.1f --> guide-OTA: %s" % (
        _mean, _median, "YES" if is_guideota else "NO"))

    if (debug):
        return is_guideota, excesses, _mean, _median, skynoise, corner_hdu, center_hdu

    return is_guideota
Example #7
0
def shifted_corr(reference, image, displacement):
    """Calculate the correlation between the reference and the image shifted
    by the given displacement.

    Parameters
    ----------
    reference : np.ndarray
    image : np.ndarray
    displacement : np.ndarray

    Returns
    -------
    correlation : float

    """

    ref_cuts = np.maximum(0, displacement)
    ref = reference[ref_cuts[0]:, ref_cuts[1]:, ref_cuts[2]:]
    im_cuts = np.maximum(0, -displacement)
    im = image[im_cuts[0]:, im_cuts[1]:, im_cuts[2]:]
    s = np.minimum(im.shape, ref.shape)
    ref = ref[:s[0], :s[1], :s[2]]
    im = im[:s[0], :s[1], :s[2]]
    ref -= nanmean(ref.reshape(-1, ref.shape[-1]), axis=0)
    ref = np.nan_to_num(ref)
    im -= nanmean(im.reshape(-1, im.shape[-1]), axis=0)
    im = np.nan_to_num(im)
    assert np.all(np.isfinite(ref)) and np.all(np.isfinite(im))
    corr = nanmean(
        [old_div(np.sum(i * r), np.sqrt(np.sum(i * i) * np.sum(r * r))) for
         i, r in zip(np.rollaxis(im, -1), np.rollaxis(ref, -1))])
    return corr
Example #8
0
def reactivation(date, cs):
    """
    Check if a date has good enough data to trust reactivations.

    Parameters
    ----------
    date
    cs

    Returns
    -------

    """

    # ncells, ntimes, nonsets
    trs = stimulus.trials(date,
                          cs,
                          start_s=0,
                          end_s=None,
                          trace_type='dff',
                          baseline=(-1, 0))

    trs = nanmean(trs, axis=2)
    trs = nanmean(trs, axis=1)

    if not len(trs):
        return False

    # Across ncells
    dff_active = np.sum(trs > 0.025) / float(len(trs))

    return dff_active > 0.05
Example #9
0
def shifted_corr(reference, image, displacement):
    """Calculate the correlation between the reference and the image shifted
    by the given displacement.

    Parameters
    ----------
    reference : np.ndarray
    image : np.ndarray
    displacement : np.ndarray

    Returns
    -------
    correlation : float

    """

    ref_cuts = np.maximum(0, displacement)
    ref = reference[ref_cuts[0]:, ref_cuts[1]:, ref_cuts[2]:]
    im_cuts = np.maximum(0, -displacement)
    im = image[im_cuts[0]:, im_cuts[1]:, im_cuts[2]:]
    s = np.minimum(im.shape, ref.shape)
    ref = ref[:s[0], :s[1], :s[2]]
    im = im[:s[0], :s[1], :s[2]]
    ref -= nanmean(ref.reshape(-1, ref.shape[-1]), axis=0)
    ref = np.nan_to_num(ref)
    im -= nanmean(im.reshape(-1, im.shape[-1]), axis=0)
    im = np.nan_to_num(im)
    assert np.all(np.isfinite(ref)) and np.all(np.isfinite(im))
    corr = nanmean(
        [old_div(np.sum(i * r), np.sqrt(np.sum(i * i) * np.sum(r * r))) for
         i, r in zip(np.rollaxis(im, -1), np.rollaxis(ref, -1))])
    return corr
Example #10
0
    def __init__(self,crs='4326',region=(92.3032344909, 9.93295990645, 101.180005324, 28.335945136)
                     ,resolution=500,noData=0,country=None):

        if country:
            if crs != '4326':
                raise ValueError('User defined crs must be EPSG:4326 when using predifined country bounding boxes')
            else:
                self.west,self.south,self.east,self.north = countries.bounding_boxes[country][1]
        else:
            #! Need to add in error handler for correct bounding box format !#
            self.west,self.south,self.east,self.north = region

        if '4326' in crs:
            midPoint = [bn.nanmean([self.north,self.south]),
                        bn.nanmean([self.east,self.west])]
            spacing = utils.meters2dd(midPoint,resolution)

        elif type(resolution) == list:
            spacing = resolution[0]

        else:
            spacing = resolution,resolution

        self.lons = np.arange(self.west,self.east,spacing)
        self.lats = np.arange(self.south,self.north,spacing)

        self.xx,self.yy = np.meshgrid(self.lons,self.lats)

        self.nominalResolution = (spacing)
        self.dims = self.xx.shape

        return
    def fit(self, X, y):
        X_y = self._check_params(X, y)
        self.X = X_y[0]
        self.y = X_y[1].reshape((-1, 1))
        n, p = X.shape

        S = []    # list of selected features
        F = range(p)    # list of unselected features

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
        else:
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # Find the first feature
        k_min = 3
        range_k = 7
        xy_MI = np.empty((range_k, p))
        for i in range(range_k):
            xy_MI[i, :] = self._get_first_mi_vector(i + k_min)
        xy_MI = bn.nanmedian(xy_MI, axis=0)

        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))
        S_mi.append(bn.nanmax(xy_MI))

        if self.verbose > 0:
            self._info_print(S, S_mi)

        # Find the next features
        if self.n_features == 'auto':
            n_features = np.inf
        else:
            n_features = self.n_features

        while len(S) < n_features:
            s = len(S) - 1
            feature_mi_matrix[s, F] = self._get_mi_vector(F, S[-1])
            fmm = feature_mi_matrix[:len(S), F]
            if bn.allnan(bn.nanmean(fmm, axis=0)):
                break
            MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
            if np.isnan(MRMR).all():
                break
            selected = F[bn.nanargmax(MRMR)]
            S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)
            if self.verbose > 0:
                self._info_print(S, S_mi)
            if self.n_features == 'auto' and len(S) > 10:
                MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1)
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
                    break
        self.n_features_ = len(S)
        self.ranking_ = S
        self.mi_ = S_mi

        return self
Example #12
0
def _get_fluctuations(image_array, correction='median'):

    # Calculate the mean PV for each image
    if correction == 'mean':
        mean_values = bn.nanmean(bn.nanmean(image_array, axis=1), axis=1)
    elif correction == 'median':
        mean_values = bn.nanmedian(bn.nanmedian(image_array, axis=1), axis=1)

    return mean_values
def least_square_method(dspt):
    npol = 6
    com = np.array([bn.nanmean(dspt.lon), bn.nanmean(dspt.lat)])
    timeseries = False
    ncc = dspt.lon.size
    dlon = []
    dlat = []
    for i in range(ncc):
        # haversine(p1,p2)
        dlon.append(
            haversine([dspt.lon[i], com[1]], com) * 1000 *
            np.sign(dspt.lon[i] - com[0]))
        dlat.append(
            haversine([com[0], dspt.lat[i]], com) * 1000 *
            np.sign(dspt.lat[i] - com[1]))

    dlon = np.array(dlon)
    dlat = np.array(dlat)
    if not timeseries:
        R = np.mat(np.vstack((np.ones((ncc)), dlon, dlat)).T)
        u0 = np.mat(dspt.u.values).T
        v0 = np.mat(dspt.v.values).T

        if (np.isnan(u0).sum() == 0) & (np.isnan(v0).sum()
                                        == 0) & (np.isnan(R).sum() == 0):
            A, _, _, _ = la.lstsq(R, u0)
            B, _, _, _ = la.lstsq(R, v0)
        else:
            A = np.nan * np.ones(ncc)
            B = np.nan * np.ones(ncc)

    points = np.vstack([dlon, dlat])
    if (np.isfinite(dlon).sum() == npol) and (np.isfinite(dlat).sum() == npol):
        # careful with nans
        cov = np.cov(points)
        w, v = np.linalg.eig(cov)
        aspect = bn.nanmin(w) / bn.nanmax(w)

        if aspect < 0.99:
            ind = bn.nanargmax(w)
            angle = np.arctan(v[ind, 1] / v[ind, 0]) * 180 / np.pi
            if (angle < 0):
                angle += 360.
        else:
            angle = np.nan
    else:
        aspect = np.nan
        angle = np.nan

    dspt['ux'] = float(A[1])
    dspt['uy'] = float(A[2])
    dspt['vx'] = float(B[1])
    dspt['vy'] = float(B[2])
    dspt['aspect'] = aspect
    dspt['angle'] = angle

    return dspt
Example #14
0
 def compute(self, today, assets, out, closes):
     diffs = diff(closes)
     ups = nanmean(clip(diffs, 0, inf), axis=0)
     downs = nanmean(clip(diffs, -inf, 0), axis=0)
     return evaluate(
         "100 - (100 / (1 + (ups / downs)))",
         locals_dict={'ups': ups, 'downs': downs},
         globals_dict={},
         out=out,
     )
Example #15
0
def _nanmean(array, axis=None):
    """Bottleneck nanmean function that handle tuple axis."""

    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0

    if isinstance(array, Quantity):
        return array.__array_wrap__(bottleneck.nanmean(array, axis=axis))
    else:
        return bottleneck.nanmean(array, axis=axis)
Example #16
0
def _nanmean(array, axis=None):
    """Bottleneck nanmean function that handle tuple axis."""

    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0

    if isinstance(array, Quantity):
        return array.__array_wrap__(bottleneck.nanmean(array, axis=axis))
    else:
        return bottleneck.nanmean(array, axis=axis)
Example #17
0
def sky_subtract(V, Iin, err=None, window=400, skywindow=400, threshold=5.0, niter=20, ax=None, center=0):
    """
    Takes a line (probably produced by plot_line) and does a rough sky
    subtraction. The sky is estimated from the region of the spectrum that is
    window/2 away from the line center and skywindow wide. This is an
    iterative process that uses ADE_moments to compute the line center. When
    the new line center (after subtraction) is less than threshold different
    from the old line center the sky subtraction is considered complete.
    """
    I = np.copy(Iin)
    skylevel = 0.0
    if ax is not None:
        ax.plot(V, I)
    lowV = -1 * window / 2.0 + center
    highV = window / 2.0 + center
    idx = np.where((V >= lowV) & (V <= highV))
    skidx = np.where((V < lowV) & (V >= lowV - skywindow) | (V > highV) & (V <= highV + skywindow))
    moments = ADE.ADE_moments(V[idx], I[idx])
    oldcent = moments[0]
    skyfit = bn.nanmean(I[skidx])
    ax.axhline(y=skyfit)
    I -= skyfit
    skylevel += skyfit
    newcent = ADE.ADE_moments(V[idx], I[idx])[0]
    print "old: {}, new: {}".format(oldcent, newcent)
    n = 0
    while np.abs(newcent - oldcent) > threshold and n <= niter:
        oldcent = newcent
        lowV = oldcent - window / 2.0
        highV = oldcent + window / 2.0
        idx = np.where((V >= lowV) & (V <= highV))
        skidx = np.where((V < lowV) & (V >= lowV - skywindow) | (V > highV) & (V <= highV + skywindow))
        if idx[0].size == 0:
            idx = ([0, 1],)
        if skidx[0].size == 0:
            skfit = 0
            skidx = (np.array([0, -1]),)
        else:
            skyfit = bn.nanmean(I[skidx])
        I -= skyfit
        skylevel += skyfit
        newcent = ADE.ADE_moments(V[idx], I[idx])[0]
        print "{}: old: {}, new: {}".format(n, oldcent, newcent)
        n += 1

    if ax is not None:
        ax.errorbar(V, I, yerr=err)
        ax.axvline(x=newcent, ls=":")
        ax.axhline(y=skyfit)
        ax.axvspan(V[idx[0][0]], V[idx[0][-1]], color="r", alpha=0.3)
        ax.axvspan(V[skidx[0][0]], V[idx[0][0]], color="g", alpha=0.3)
        ax.axvspan(V[idx[0][-1]], V[skidx[0][-1]], color="g", alpha=0.3)

    return V, I, err, newcent, skylevel
Example #18
0
 def process(self, window, output_length):
     data = np.array_split(window, output_length)
     result = []
     for section in data:
         if math.isnan(bn.nanmean(section)):
             result.append(0)
         else:
             result.append(bn.nanmean(section))
     result = np.array(result)
     print "paa output shape", result.shape
     return result
Example #19
0
def smooth(x, window):
    """Calculate moving average of input with given window (number of points)"""
    window = int(window)
    if window <= 1: return x
    if window % 2 == 0: window += 1
    if window >= len(x): return zeros_like(x) + nanmean(x)
    y = move_mean(x, window, min_count=1)
    yny = append(y[window // 2:], [NaN] * (window // 2))
    for k in range(window // 2):
        yny[k] = nanmean(x[:(2 * k + 1)])
        yny[-(k + 1)] = nanmean(x[-(2 * k + 1):])
    return yny
Example #20
0
 def calc_table_np(self, array):
     if len(array) == 0:
         return array
     if self.out_choiced == 0:  #snr
         return self.make_table(
             (bottleneck.nanmean(array, axis=0) /
              bottleneck.nanstd(array, axis=0)).reshape(1, -1), self.data)
     elif self.out_choiced == 1:  #avg
         return self.make_table(
             bottleneck.nanmean(array, axis=0).reshape(1, -1), self.data)
     else:  # std
         return self.make_table(
             bottleneck.nanstd(array, axis=0).reshape(1, -1), self.data)
    def process(self, data, output_length):
        data = np.array(data)
        data = np.array_split(data, int(output_length))
        result = []
        for section in data:
            if math.isnan(bn.nanmean(section)):
                result.append(0)
            else:
                result.append(bn.nanmean(section))
        result = np.array(result)
        result = pd.DataFrame((result))

        print "paa output shape", result.shape
        return result
Example #22
0
def visually_inhib(date, cs, integrate_bins=6, ncses=3):
    """
    Calculate the probability of being visually driven for each cell.

    Parameters
    ----------
    date : Date
    cs : str
    integrate_bins : int
        Number of bins over which to integrate the visual stim.
    ncses : int
        Number of possible cses, used to correct for multiple comparisons.
    nolick : bool
        If True, exclude trials where there was licking during the stimulus
        presentation.

    Result
    ------
    np.ndarray
        An array of length equal to the number cells, values are the log
        inverse p-value of that cell responding to the particular cs.

    """

    # Baseline is mean across frames, now ncells x nonsets
    baselines = nanmean(stimulus.trials(date, cs, start_s=-1, end_s=0) * -1,
                        axis=1)
    stimuli = stimulus.trials(date, cs, start_s=0) * -1
    fintegrate = -(-np.shape(stimuli)[1] // integrate_bins)  # ceiling division

    # Per-cell value
    meanbl = nanmean(baselines, axis=1)
    ncells = np.shape(baselines)[0]

    # We will save the maximum inverse p values
    maxinvps = np.zeros(ncells, dtype=np.float64)
    bonferroni_n = ncells * ncses * integrate_bins

    for i in range(integrate_bins):
        trs = nanmean(stimuli[:, i * fintegrate:(i + 1) * fintegrate, :],
                      axis=1)

        for c in range(ncells):
            if nanmean(trs[c, :]) > meanbl[c]:
                pv = stats.ranksums(baselines[c, :], trs[c, :]).pvalue
                logpv = -1 * np.log(pv / bonferroni_n)
                if logpv > maxinvps[c]:
                    maxinvps[c] = logpv

    return maxinvps
Example #23
0
def shifted_corr(reference, image, displacement):
    ref_cuts = np.maximum(0, displacement)
    ref = reference[ref_cuts[0]:, ref_cuts[1]:, ref_cuts[2]:]
    im_cuts = np.maximum(0, -displacement)
    im = image[im_cuts[0]:, im_cuts[1]:, im_cuts[2]:]
    s = np.minimum(im.shape, ref.shape)
    ref = ref[:s[0], :s[1], :s[2]]
    im = im[:s[0], :s[1], :s[2]]
    ref -= nanmean(ref.reshape(-1, ref.shape[-1]), axis=0)
    ref = np.nan_to_num(ref)
    im -= nanmean(im.reshape(-1, im.shape[-1]), axis=0)
    im = np.nan_to_num(im)
    return np.mean([np.sum(i * r) / np.sqrt(np.sum(i * i) * np.sum(r * r))
                    for i, r in zip(np.rollaxis(im, 1), np.rollaxis(ref, 1))])
    def process(self, data, output_length):
        data = np.array_split(data, output_length)
        result = []
        for section in data:
            if math.isnan(bn.nanmean(section)):
                result.append(0)
            else:
                result.append(bn.nanmean(section))
        result = np.array(result)
        print "paa output shape", result.shape
        return result

    #def getConfigurationParams(self):
        #return {"output_length":"100"}
Example #25
0
def nanmean(array, axis=None):
    """
    A nanmean function that uses bottleneck if available.
    """
    if HAS_BOTTLENECK:
        if isinstance(axis, tuple):
            array = move_tuple_axes_first(array, axis=axis)
            axis = 0

        if isinstance(array, u.Quantity):
            return array.__array_wrap__(bn.nanmean(array, axis=axis))
        else:
            return bn.nanmean(array, axis=axis)
    else:
        return np.nanmean(array, axis=axis)
Example #26
0
def nw_aggregation(nw_paths, genes, file_key='nw'):
    """Function for aggregating co-expression networks
    
    Takes a list of paths to HDF5 files and reads in networks,
    avearges them and then re-ranks.

    Each HDF5 needs to be in the Pytable in the fixed format with
    the network stored under the key listed in the keyword argument file_key
    
    Arguments:
        nw_paths {list} -- list of strings or paths to HDF5 files
        genes {np.array} -- numpy array of genes for network
    
    Keyword Arguments:
        file_key {str} --  key in HDF5 network is stored under (default: {'nw'})
    
    Returns:
        pd.DataFrame -- Aggregate Network
    """

    agg_nw = np.zeros([genes.shape[0], genes.shape[0]])
    for nw_path in nw_paths:
        nw = pd.read_hdf(nw_path,file_key)
        fill = bottleneck.nanmean(nw.values,axis=None)
        agg_nw +=nw.loc[genes,genes].fillna(fill).values
        del nw
        gc.collect()

    return pd.DataFrame(rank(agg_nw),index=genes, columns=genes)
Example #27
0
def factor_normalize(factor):
    x_m = factor.values
    mean = bn.nanmean(x_m, axis=1).reshape(-1, 1)
    std = bn.nanstd(x_m, axis=1, ddof=1).reshape(-1, 1)
    with np.errstate(invalid='ignore'):
        res = (x_m - mean) / std
    return pd.DataFrame(res, factor.index, factor.columns)
Example #28
0
def demean(arr, axis=None):
    """
    Subtract the mean along the specified axis.
    
    Parameters
    ----------
    arr : ndarray
        Input array.
    axis : {int, None}, optional
        The axis along which to remove the mean. The default (None) is
        to subtract the mean of the flattened array.

    Returns
    -------
    y : ndarray
        A copy with the mean along the specified axis removed.

    Examples
    --------
    >>> arr = np.array([1, np.nan, 2, 3])
    >>> demean(arr)
    array([ -1.,  NaN,   0.,   1.])
 
    """
    marr = bn.nanmean(arr, axis)
    if (axis != 0) and (not axis is None) and (not np.isscalar(marr)):
        ind = [slice(None)] * arr.ndim
        ind[axis] = np.newaxis
        marr = marr[ind]
    return arr - marr
def mean_age_of_wave(x):
    age = []
    for i in range(len(Speed_Dating_df['age'])):
        if Speed_Dating_df['wave'][i] == x:
            age.append(Speed_Dating_df['age'][i])
    median_age = round(bn.nanmean(age), 2)
    return median_age
def calc_we(cdata, basedir, log_target_rate):
    # WE parameters
    we_dir = os.path.join(basedir, cdata['name'], 'analysis')
    we_dt = cdata['tau']
    we_nbins = cdata['nbins']
    we_target_count = cdata['target_count']

    winsize_flux = cdata['analysis']['winsize_flux']
    winsize_err = cdata['analysis']['winsize_err']
    last_n = cdata['analysis']['last_n']

    we_nframes = 0

    we_data_files = glob(os.path.join(we_dir, '*/rate.h5'))

    for fname in we_data_files:
        f = h5py.File(fname, 'r')
        we_nframes = max(we_nframes, f.attrs['last_completed_iter'] - 2)
        f.close()

    we_err = np.empty((len(we_data_files), 2, we_nframes))
    we_err.fill(np.nan)

    for k, fname in enumerate(we_data_files):
        print 'we: {}'.format(fname)
        f = h5py.File(fname, 'r')
        s = f['data'][:]
        dget = min(we_nframes, s.shape[0])
        s = s[:dget, :]

        ss = np.empty_like(s)
        for i in xrange(4):
            ss[:, i] = smooth(s[:, i], winsize_flux, 'flat')

        sm = s[-last_n:, :].sum(0)

        rAB = (1.0 * ss[:, 1]) / (we_dt * ss[:, 2])
        rBA = (1.0 * ss[:, 0]) / (we_dt * ss[:, 3])

        rABm = (1.0 * sm[1]) / (we_dt * sm[2])
        rBAm = (1.0 * sm[0]) / (we_dt * sm[3])

        print 'we_{} -- kAB: {}, kBA: {}'.format(k, rABm, rBAm)

        we_err[k, 0, :rAB.shape[0]] = logfunc(rAB) - log_target_rate[0]
        we_err[k, 1, :rBA.shape[0]] = logfunc(rBA) - log_target_rate[1]

        f.close()

    we_err = np.abs(we_err)

    #we_err_avg = np.sqrt(np.mean(we_err**2,0))
    we_err_avg = np.sqrt(bn.nanmean(we_err**2, 0))

    for i in xrange(2):
        we_err_avg[i, :] = smooth(we_err_avg[i, :], winsize_err, 'flat')

    we_t = we_dt * we_nbins * we_target_count * np.arange(we_nframes)

    return we_err_avg, we_t
Example #31
0
def calc_we(cdata,basedir,log_target_rate):
     # WE parameters
    we_dir = os.path.join(basedir,cdata['name'],'analysis')
    we_dt = cdata['tau']
    we_nbins = cdata['nbins']
    we_target_count = cdata['target_count']

    winsize_flux = cdata['analysis']['winsize_flux']
    winsize_err = cdata['analysis']['winsize_err']
    last_n = cdata['analysis']['last_n']

    we_nframes = 0

    we_data_files = glob(os.path.join(we_dir,'*/rate.h5'))

    for fname in we_data_files:
        f = h5py.File(fname,'r')
        we_nframes = max(we_nframes,f.attrs['last_completed_iter']-2)
        f.close() 

    we_err = np.empty((len(we_data_files),2,we_nframes))
    we_err.fill(np.nan)

    for k,fname in enumerate(we_data_files):
        print 'we: {}'.format(fname)
        f = h5py.File(fname,'r')
        s = f['data'][:]
        dget = min(we_nframes,s.shape[0])
        s = s[:dget,:]

        ss = np.empty_like(s)
        for i in xrange(4):
            ss[:,i] = smooth(s[:,i],winsize_flux,'flat')

        sm = s[-last_n:,:].sum(0)

        rAB = (1.0*ss[:,1]) / (we_dt*ss[:,2])
        rBA = (1.0*ss[:,0]) / (we_dt*ss[:,3])

        rABm = (1.0*sm[1]) / (we_dt*sm[2])
        rBAm = (1.0*sm[0]) / (we_dt*sm[3])

        print 'we_{} -- kAB: {}, kBA: {}'.format(k,rABm,rBAm)

        we_err[k,0,:rAB.shape[0]] = logfunc(rAB) - log_target_rate[0]
        we_err[k,1,:rBA.shape[0]] = logfunc(rBA) - log_target_rate[1]

        f.close()

    we_err = np.abs(we_err)

    #we_err_avg = np.sqrt(np.mean(we_err**2,0))
    we_err_avg = np.sqrt(bn.nanmean(we_err**2,0))

    for i in xrange(2):
        we_err_avg[i,:] = smooth(we_err_avg[i,:],winsize_err,'flat')

    we_t = we_dt * we_nbins * we_target_count * np.arange(we_nframes)

    return we_err_avg, we_t
Example #32
0
def _nanmean(array, axis=None):
    """Bottleneck nanmean function that handle tuple axis."""

    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0
    return bottleneck.nanmean(array, axis=axis)
Example #33
0
 def time_step(self, xt):
     xt = np.reshape(xt, newshape=self.dimensions)
     ret_val = 0.
     self.buffer.append(xt)
     self.present.time_step(xt)
     if self.t >= self.buffer_len:
         pst_xt = self.buffer[0]
         self.past.time_step(pst_xt)
         if self.t >= self.present.theta + self.past.theta:
             ret_val = self.comparison_function(self.present, self.past,
                                                self.present.alpha)
     self.ma_window.append(ret_val)
     if self.t % self.ma_recalc_delay == 0:
         self.anomaly_mean = bn.nanmean(self.ma_window)
         self.anomaly_std = bn.nanstd(self.ma_window, ddof=self.ddof)
     if self.anomaly_std is None or self.t < len(self.ma_window):
         anomaly_density = 0
     else:
         normalized_score = (ret_val - self.anomaly_mean)/self.anomaly_std
         if -4 <= normalized_score <= 4:
             anomaly_density = CDF_TABLE[round(normalized_score, 3)]
         elif normalized_score > 4:
             anomaly_density = 1.
         else:
             anomaly_density = 0.
     self.t += 1
     return ret_val, anomaly_density
Example #34
0
def get_lugsail_batch_means_est(data_in, steps=None):
    m = len(data_in)
    T_iL = []
    s_i = []
    n_i = []

    for data_chain, burnin_chain in data_in:
        data = data_chain[burnin_chain:steps]
        if data.size < 2:
            return np.inf
        # [chapter 2.2 in Vats and Knudson, 2018]
        n_ii = data.size
        b = int(n_ii**(1 / 2))  # Batch size. Alternative: n ** (1/3)
        n_i.append(n_ii)

        chain_mean = bn.nanmean(data)
        T_iL.append(
            2 * get_tau_lugsail(b, data, chain_mean) \
            - get_tau_lugsail(b // 3, data, chain_mean)
        )
        s_i.append(bn.nanvar(data, ddof=1))

    T_L = np.mean(T_iL)
    s = np.mean(s_i)
    n = np.round(np.mean(n_i))

    sigma_L = ((n - 1) * s + T_L) / n

    # [eq. 5 in Vats and Knudson, 2018]
    R_L = np.sqrt(sigma_L / s)

    return R_L
Example #35
0
def bootstrap(func, arglist, N, kwargs={}):
    '''Computes error via bootstrapping on an arbitrary function. The
    major restriction is that func is assumed to return a single, 1D,
    Numpy array. Bootstrap will also resample ALL of the elements of
    arglist. If you want to keep some inputs unchanged pass them as
    keywords. The func can have an arbitrary number of arguments and
    keyword arguments. If the output of func is a Ndarray of length N
    then bootstrap returns two arrays of length N. The first is the
    mean value over all bootstraps and the second is the stddev of the
    same.
    '''
    
    if type(arglist) != list:
        arglist = [arglist]
    size = len(arglist[0])
    resultarr = None
    for i in range(N):

        idx = np.random.randint(0,size,size)
        bootargs = [i[idx] for i in arglist]
        result = func(*bootargs,**kwargs)
        try:
            resultarr = np.vstack((resultarr,result))
        except ValueError:
            resultarr = result

    print np.isnan(resultarr).sum()
    return bn.nanmean(resultarr,axis=0),bn.nanstd(resultarr,axis=0)
Example #36
0
 def _calc_parameters(self, wfm_counts):
     # loop over the waveforms
     for i in np.arange(self._n): 
         try:
             y = wfm_counts[i, :].flatten().astype(np.float32)
             y -= bn.nanmean(y[0:11])  # Remove Noise
             y[np.where(y < 0.0)[0]] = 0.0  # Set negative counts to zero
             yp = np.nanmax(y)  # Waveform peak value
             
             if np.isnan(yp):  # if the current wf is nan
                 # no ltpp can be computed
                 self._ltpp[i] = np.nan
             else:
                 ypi = np.nanargmax(y)  # Waveform peak index
                 
                 # gates to compute the late tail:
                 # [ypi+50:ypi+70] if 0padding=2, [ypi+25:ypi+35] if 0padding=1
                 gate_start = ypi + self._pad*25
                 gate_stop = ypi + self._pad*35 + 1
                 
                 if gate_start > self._n_range_bins or gate_stop > self._n_range_bins:
                     # not enough gates to compute the LTPP
                     self._ltpp[i] = np.nan
                 else:
                     self._ltpp[i] = np.mean(y[gate_start:gate_stop])/yp
                     
         except ValueError:
             self._ltpp[i] = np.nan
Example #37
0
 def _align_planes(shifts):
     """Align planes to minimize shifts between them."""
     mean_shift = nanmean(np.concatenate(shifts), axis=0)
     # calculate alteration of shape (num_planes, dim)
     alteration = (mean_shift - mean_shift[0]).astype(int)
     for seq in shifts:
         seq -= alteration
Example #38
0
def create_nw(data, replace_nans):
    nw = np.corrcoef(data)
    np.fill_diagonal(nw, 1)
    nw = rank(nw)
    if replace_nans:
        nw[np.isnan(nw)] = bottleneck.nanmean(nw)
    return nw
Example #39
0
 def _align_planes(shifts):
     """Align planes to minimize shifts between them."""
     mean_shift = nanmean(np.concatenate(shifts), axis=0)
     # calculate alteration of shape (num_planes, dim)
     alteration = (mean_shift - mean_shift[0]).astype(int)
     for seq in shifts:
         seq -= alteration
Example #40
0
def _nanmean(array, axis=None):
    """Bottleneck nanmean function that handle tuple axis."""

    if isinstance(axis, tuple):
        array = _move_tuple_axes_first(array, axis=axis)
        axis = 0
    return bottleneck.nanmean(array, axis=axis)
Example #41
0
def demean(arr, axis=None):
    """
    Subtract the mean along the specified axis.
    
    Parameters
    ----------
    arr : ndarray
        Input array.
    axis : {int, None}, optional
        The axis along which to remove the mean. The default (None) is
        to subtract the mean of the flattened array.

    Returns
    -------
    y : ndarray
        A copy with the mean along the specified axis removed.

    Examples
    --------
    >>> arr = np.array([1, np.nan, 2, 3])
    >>> demean(arr)
    array([ -1.,  NaN,   0.,   1.])
 
    """
    marr = bn.nanmean(arr, axis) 
    if (axis != 0) and (not axis is None) and (not np.isscalar(marr)):
        ind = [slice(None)] * arr.ndim
        ind[axis] = np.newaxis
        marr =  marr[ind]
    return arr - marr   
Example #42
0
 def _calc_parameters(self, wfm_counts):
     for i in np.arange(self._n):
         try:
             y = wfm_counts[i, :].flatten().astype(np.float32)
             y -= bn.nanmean(y[0:11])  # Remove Noise
             y[np.where(y < 0.0)[0]] = 0.0  # Set negative counts to zero
             yp = np.nanmax(y)  # Waveform peak value
             ypi = np.nanargmax(y)  # Waveform peak index
             if 3*self._pad < ypi < self._n_range_bins-4*self._pad:
                 self._peakiness_l[i] = yp/bn.nanmean(y[ypi-3*self._pad:ypi-1*self._pad+1])*3.0
                 self._peakiness_r[i] = yp/bn.nanmean(y[ypi+1*self._pad:ypi+3*self._pad+1])*3.0
                 self._peakiness[i] = yp/y.sum()*self._n_range_bins
         except ValueError:
             self._peakiness_l[i] = np.nan
             self._peakiness_r[i] = np.nan
             self._peakiness[i] = np.nan
Example #43
0
def singleVar(content):
    return dict(min=nanmin(content),
                max=nanmax(content),
                mean=nanmean(content),
                median=nanmedian(content),
                valid=numpy.sum(numpy.isfinite(content)) * 100.0 /
                content.size)
Example #44
0
 def _norm_data(self,X):
     m = bn.nanmean(X,0)
     inds = np.where(np.isnan(X))
     X[inds]=np.take(m,inds[1])
     np.subtract(X,m,out=X)
     v = X.var(0)
     np.divide(X,np.sqrt(v),out=X)
     return X
Example #45
0
 def get_allele_frequency(self,bed,args):
     s = args.SNPs_to_read
     af = np.zeros((bed.sid_count))
     var = np.zeros((bed.sid_count))
     if (args.from_bp is not None) and (args.to_bp is not None):
         k0 = np.where((bed.pos[:,2]>=args.from_bp))[0][0]
         k1 = np.where((bed.pos[:,2]<=args.to_bp))[0][-1]
         X = bed[:,k0:k1].read().val
         af[k0:k1] = bn.nanmean(X,0)/2.0
         var[k0:k1] = self._fast_var(X,2*af[k0:k1])
     else:
         for i in xrange(int(np.ceil(bed.sid_count/s))):
             X = bed[:,i*s:(i+1)*s].read().val
             af[i*s:(i+1)*s] = bn.nanmean(X,0)/2.0
             var[i*s:(i+1)*s] = self._fast_var(X,2*af[i*s:(i+1)*s])
     af[var==0]=0
     return af
Example #46
0
def _phase3(self):
	"""
	Normal phase 3, but with tracking the boost changes. Double commented lines
	are new.
	"""
	
	# Update permanences
	self.p = np.clip(self.p + (self.c_pupdate * self.y[:, 0:1] *
		self.x[self.syn_map] - self.pdec * self.y[:, 0:1]), 0, 1)
	
	if self.disable_boost is False:
		# Update the boosting mechanisms
		if self.global_inhibition:
			min_dc = np.zeros(self.ncolumns)
			min_dc.fill(self.c_mdc * bn.nanmax(self.active_dc))
		else:
			min_dc = self.c_mdc * bn.nanmax(self.neighbors * self.active_dc, 1)
		
		## Save pre-overlap boost info
		boost = list(self.boost)
		
		# Update boost
		self._update_active_duty_cycle()
		self._update_boost(min_dc)
		self._update_overlap_duty_cycle()
	
		## Write out overlap boost changes
		with open(os.path.join(self.out_path, 'overlap_boost.csv'), 'ab') as f:
			writer = csv.writer(f)
			writer.writerow([self.iter, bn.nanmean(boost != self.boost)])
	
		# Boost permanences
		mask = self.overlap_dc < min_dc
		mask.resize(self.ncolumns, 1)
		self.p = np.clip(self.p + self.c_sboost * mask, 0, 1)
	
		## Write out permanence boost info
		with open(os.path.join(self.out_path, 'permanence_boost.csv'), 'ab') \
			as f:
			writer = csv.writer(f)
			writer.writerow([self.iter, bn.nanmean(mask)])
	
	# Trim synapses
	if self.trim is not False:
		self.p[self.p < self.trim] = 0
Example #47
0
def crmse(homog, orig, centered=True, crop=None):
    """Calculate the Centred Root-Mean-Square Error (CRMSE) between any pair of
    homogenised and original data sets.

    Parameters
    ----------
    homog : array_like
        Homogenised station data.
    orig : array_like
        Original station data.
    centered : boolean, default True
        Return RMSE or the centred RMSE.
    crop : int, optional
        Do not consider the first and the last `crop` years.

    Returns
    -------
    ndarray
        CRMSE.

    Notes
    -----
    RMSE is commonly used in meteorology, to see how effectively a mathematical
    model predicts the behaviour of the atmosphere.

    The RMSD of an estimator :math:`{\hat{\\theta}}` with respect to an
    estimated parameter :math:`{\\theta}` is defined as the square root of the
    mean square error:

    .. math:: \operatorname{RMSE}(\hat{\\theta}) = \sqrt{\operatorname{MSE}
        (\hat{\\theta})} = \sqrt{\operatorname{E}((\hat{\\theta}-\\theta)^2)}.

    """
    if crop is not None:
        homog = homog[crop:-crop]
        orig = orig[crop:-crop]
        
    # access .values to support both dataframe's (monthly) and series (yearly)
    if centered:
        homog -= bn.nanmean(homog.values)
        orig -= bn.nanmean(orig.values)

    diff = np.sqrt(bn.nanmean(np.power((homog - orig).values, 2)))

    return diff
Example #48
0
def replace_column_nans_by_mean(matrix):
    # Set the value of gaps/dashes in each column to be the average of the other values in the column.
    nan_indices = np.where(np.isnan(matrix))
    # Note: bn.nanmean() instead of np.nanmean() because it is a lot(!) faster.
    column_nanmeans = bn.nanmean(matrix, axis=0)

    # For each column, assign the NaNs in that column the column's mean.
    # See http://stackoverflow.com/a/18689440 for an explanation of the following line.
    matrix[nan_indices] = np.take(column_nanmeans, nan_indices[1])
Example #49
0
 def paa(self, data_slice):
     if self.word_length > len(data_slice):
         data = data_slice.values.tolist()
         for i in range(len(data), self.word_length):
             data.append(bn.nanmean(data[:i-1]))
             if(math.isnan(data[i])):
                 data[i] = 0
         return np.array(data)
     if self.word_length == len(data_slice):
         return data_slice
     data = np.array_split(data_slice, self.word_length)
     result = []
     for section in data:
         if math.isnan(bn.nanmean(section)):
             result.append(0)
         else:
             result.append(bn.nanmean(section))
     result = np.array(result)
     return result
Example #50
0
 def mean(self, axis=None):
     # TODO: support multiple axes at the same time, via recursion
     if axis is None:
         return bn.nanmean(self.data)
     # TODO: support specifying axes as indexed number instead of
     #       name
     if axis not in list(self.coordinates.keys()):
         raise ValueError("You asked me to calculate the mean along axis "
                          "%s, but I don't know anything about this "
                          "coordinate dimension", axis)
     # TODO: replace OrderedDict with CoordinateSet
     newcoords = OrderedDict()
     # TODO: add keys() as property to CoordinateSet
     for dim in list(self.coordinates.keys()):
         if dim != axis:
             newcoords[dim] = self.coordinates[dim]
     newdata = bn.nanmean(self.data,
                          axis=list(self.coordinates.keys()).index(axis))
     return gridded_array(newdata, newcoords, self.title)
Example #51
0
 def process(self, window, output_length):
     data = np.array_split(window, output_length)
     result = []
     for segment in data:
         mean = bn.nanmean(segment)
         if math.isnan(mean):
             result.append(0)
         else:
             result.append(mean)
     result = np.array(result)
     return result
def get_median_level(data, radii, ri, ro):

    selected = (radii > ri) & (radii < ro) #& (numpy.isfinite(data))
    pixelcount = numpy.sum(selected)
    if (pixelcount > 0):
        #cutout = data[selected]
        #median = numpy.median(cutout[0:5001])
        cutout = numpy.array(data[selected], dtype=numpy.float32)
        median = bottleneck.nanmean(cutout)
    else:
        median = numpy.NaN

    return median, pixelcount
Example #53
0
def make_ratios():

    '''read in the data'''
    raw_data = pyfits.open('../reduced/tiESO_z0_MgI.fits')[0].data
    old_data = pyfits.open('to45.fits')[0].data
    MB_data = pyfits.open('tn45.fits')[0].data

    # raw_data = pyfits.open('tsky_sub.fits')[0].data
    # old_data = pyfits.open('tosky_sub.fits')[0].data
    # MB_data = pyfits.open('tnsky_sub.fits')[0].data

    fig = plt.figure()
    ax = fig.add_subplot(111)

    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)

    for data, name in zip([raw_data,old_data,MB_data], 
                          ['No flat','Nightly flat','Reconstructed flat']):
        
        x, line1, line2 = get_lines(data,20)

        ratio = line1/line2

        zeroed_ratio = ratio/ bn.nanmean(ratio[30:])
        
        ax.plot(x,ratio,label=name)
        ax1.plot(x,zeroed_ratio,label=name)

    ax.set_xlabel('$\mathrm{Slit\ position\ [px]}$')
    ax.set_ylabel('$f(4861\mathrm{nm})/f(5198\mathrm{nm})$')
    ax.legend(loc=0)
#    ax.set_ylim(0,2.5)
#    ax.set_title(datetime.now().isoformat(' '))

    ax1.set_xlabel('$\mathrm{Slit\ position\ [px]}$')
    ax1.set_ylabel('$f(4861\mathrm{nm})/f(5198\mathrm{nm})$')
    ax1.legend(loc=0,numpoints=1)
    # ax1.set_ylim(0.5,1.5)
    # ax1.set_title('Sky-subtracted object')
    # ax1.text(250,1.4,'Plot 2',fontsize=17)
    ax1.set_ylim(0.9,1.1)
    ax1.set_title('Object frame only')
    ax1.text(250,1.07,'Plot 1',fontsize=17)

#    fig.show()
    fig1.show()
    return
Example #54
0
File: oPCA.py Project: asaich/sima
def _method_1(data, num_pcs=None):
    """Compute OPCA when num_observations > num_dimensions."""
    data = np.nan_to_num(data - nanmean(data, axis=0))
    T = data.shape[0]
    corr_offset = np.dot(data[1:].T, data[:-1])
    corr_offset += corr_offset.T
    if num_pcs is None:
        eivals, eivects = eigh(corr_offset)
    else:
        eivals, eivects = eigsh(corr_offset, num_pcs, which='LA')
    eivals = np.real(eivals)
    eivects = np.real(eivects)
    idx = np.argsort(-eivals)  # sort the eigenvectors and eigenvalues
    eivals = eivals[idx] / (2. * (T - 1))
    eivects = eivects[:, idx]
    return eivals, eivects, np.dot(data, eivects)
    def fit(self, X, y, mask=None):
        """Fit Gaussian Naive Bayes according to X, y

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.
        mask : array-like, shape = [n_samples, n_features]
            Binary, 1 at unobserved features.

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_arrays(X, y, sparse_format='dense')

        n_samples, n_features = X.shape

        if n_samples != y.shape[0]:
            raise ValueError("X and y have incompatible shapes")

        if mask is not None:
            mask = array2d(mask)
            X = X.copy()
            X[mask] = np.nan

        self.classes_ = unique_y = np.unique(y)
        n_classes = unique_y.shape[0]

        self.theta_ = np.zeros((n_classes, n_features))
        self.sigma_ = np.zeros((n_classes, n_features))
        self.class_prior_ = np.zeros(n_classes)
        self._n_ij = []
        epsilon = 1e-9
        for i, y_i in enumerate(unique_y):
            self.theta_[i, :] = bn.nanmean(X[y == y_i, :], axis=0)
            self.sigma_[i, :] = bn.nanvar(X[y == y_i, :], axis=0) + epsilon
            self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
            self._n_ij.append(-0.5 * np.sum(np.log(np.pi * self.sigma_[i, :])))
        self._logprior = np.log(self.class_prior_)
        return self
Example #56
0
def do_a_line(moment_list,N,line_output,monte_output):

    x, l = make_line(moment_list)
    SNRs = np.linspace(5,100,50)
    results = np.empty((SNRs.size,4,2))
    lp = PDF(line_output)

    for i, SNR in enumerate(SNRs):
        sn_res = np.empty((N,4))
        for j in range(N):
            noise = get_noise(x, l,SNR)
            ln = l + noise
            cdf = np.cumsum(ln/np.sum(ln))
            low, high = np.interp([0.01,0.99],cdf,x)
            idx = np.where((x > low) & (x <= high))
            sn_res[j] = ADE.ADE_moments(x[idx],ln[idx])

        measured_vals = bn.nanmean(sn_res,axis=0)
        measured_stds = bn.nanstd(sn_res,axis=0)
        # print sn_res
        # print measured_stds
        # raw_input('')
        results[i,:,0] = measured_vals
        results[i,:,1] = measured_stds
        
        ax = plt.figure().add_subplot(111)
        ax.set_xlabel('Velocity [km/s]')
        ax.set_ylabel('Flux')
        ax.set_title('SNR = {:5.2f}'.format(SNR))
        ax.plot(x,ln)
        lp.savefig(ax.figure)

    lp.close()
    
    mp = PDF(monte_output)
    plots = plot_results(SNRs, results, moment_list)
    for i, plot in enumerate(plots):
        if i == 2:
            plot.set_ylim(-2,2)
        if i == 3:
            plot.set_ylim(-2,5)
        mp.savefig(plot.figure)
    mp.close()
    plt.close('all')
    return SNRs, results
Example #57
0
File: oPCA.py Project: asaich/sima
def _method_2(data, num_pcs=None):
    """Compute OPCA when num_observations <= num_dimensions."""
    data = np.nan_to_num(data - nanmean(data, axis=0))
    T = data.shape[0]
    tmp = np.dot(data, data.T)
    corr_offset = np.zeros(tmp.shape)
    corr_offset[1:] = tmp[:-1]
    corr_offset[:-1] += tmp[1:]
    if num_pcs is None:
        eivals, eivects = eig(corr_offset)
    else:
        eivals, eivects = eigs(corr_offset, num_pcs, which='LR')
    eivals = np.real(eivals)
    eivects = np.real(eivects)
    idx = np.argsort(-eivals)  # sort the eigenvectors and eigenvalues
    eivals = eivals[idx] / (2. * (T - 1))
    eivects = eivects[:, idx]
    transformed_eivects = np.dot(data.T, eivects)
    for i in range(transformed_eivects.shape[1]):  # normalize the eigenvectors
        transformed_eivects[:, i] /= np.linalg.norm(transformed_eivects[:, i])
    return eivals, transformed_eivects, np.dot(data, transformed_eivects)
Example #58
0
def height_plot_across_folders(folder_list, inputsuffix='allz2.dat', 
                               label='Mean Light Weighted Age [Gyr]', 
                               col=6, errcol=None, lowhigh=False, 
                               order=5, ylims=None, bigpoints=False,
                               binz=True, combine_all=False, plot_std=False,
                               exclude=[[],[],[],[],[],[]]):

    axlist = []
    
    plist = [6,3,4,2,1,5]
    #color_list = ['blue','turquoise','chartreuse','yellow','tomato','red']
    color_list = ['blue','seagreen','darkorange','crimson','dimgray','mediumorchid','lightblue']
    style_list = ['-','-','-','-','-','-','-']

    if not isinstance(col,list):
        col = [col] * len(folder_list)

    for i in range(6):                
        pointing = plist[i]

        ax = plt.figure().add_subplot(111)
        ax.set_xlabel('|Height [kpc]|')
        ax.set_ylabel(label)
        ax.set_title('{}\nP{}'.format(time.asctime(),pointing))

        for f, folder in enumerate(folder_list):
            color = color_list[f]
            style = style_list[f]
            
            dat = glob('{}/*P{}*{}'.format(folder, pointing, inputsuffix))[0]
            print dat
            loc = glob('{}/*P{}*locations.dat'.format(folder, pointing))[0]
            print loc
            print 'Excluding: ', exclude[pointing-1]
    
            if errcol == None:
                td = np.loadtxt(dat, usecols=(col[f],), unpack=True)
            else:
                if lowhigh:
                    td, low, high = np.loadtxt(dat, usecols=(col[f],errcol,errcol+1), unpack=True)
                    te = np.vstack((low,high))
                else:
                    td, te = np.loadtxt(dat, usecols=(col[f],errcol), unpack=True)                
            r, tz = np.loadtxt(loc, usecols=(4,5), unpack=True)

            exarr = np.array(exclude[pointing-1])-1 #becuase aps are 1-indexed
            td = np.delete(td,exarr)
            r = np.delete(r,exarr)
            tz = np.delete(tz,exarr)
            if errcol != None:
                if lowhigh:
                    te = np.delete(te,exarr,axis=1)
                else:
                    te = np.delete(te,exarr)

            alpha=1.0
            if combine_all and f == 0:
                bigD = np.zeros(td.size)
                alpha=0.3
            
            if binz:
                z = np.array([])
                d = np.array([])
                e = np.array([])
                while tz.size > 0:
                    zi = tz[0]
                    idx = np.where(np.abs(tz - zi) < 0.05)
                    d = np.r_[d,np.mean(td[idx])]
                    e = np.r_[e,np.std(td[idx])]
                    z = np.r_[z,np.abs(zi)]
                    tz = np.delete(tz, idx)
                    td = np.delete(td, idx)
            else:
                z = tz
                d = td
                if errcol == None:
                    e = np.zeros(tz.size)
                else:
                    e = te

            if combine_all:
                bigD = np.vstack((bigD,d))
                bigz = z

            gidx = d == d
            d = d[gidx]
            z = z[gidx]
            if lowhigh:
                e = e[:,gidx]
            else:
                e = e[gidx]

            sidx = np.argsort(z)
            dp = np.r_[d[sidx][order::-1],d[sidx]]
            zp = np.r_[z[sidx][order::-1],z[sidx]]
            mean = bn.move_mean(dp,order)[order+1:]
            std = bn.move_std(dp,order)[order+1:]
            spl = spi.UnivariateSpline(z[sidx],d[sidx])
            mean = spl(z[sidx])
            # mean = np.convolve(d[sidx],np.ones(order)/order,'same')
            # std = np.sqrt(np.convolve((d - mean)**2,np.ones(order)/order,'same'))
        
            # ax.plot(z[sidx],mean,color=color, ls=style, label=folder, alpha=alpha)
            # ax.fill_between(z[sidx],mean-std,mean+std, alpha=0.1, color=color)

            # print d.shape, np.sum(e,axis=0).shape
            # d = d/np.sum(e,axis=0)
            # e = np.diff(e,axis=0)[0]
            # print e.shape

            ax.errorbar(z, d, yerr=e, fmt='.', color=color,alpha=alpha,capsize=0, label=folder)

        ax.set_xlim(-0.1,2.6)
        
        if ylims is not None:
            ax.set_ylim(*ylims)
        ax.legend(loc=0,numpoints=1)

        if combine_all:
            sidx = np.argsort(bigz)
            bigD = bigD[1:]
            bigMean = bn.nanmean(bigD,axis=0)
            bigStd = bn.nanstd(bigD,axis=0)
            bigspl = spi.UnivariateSpline(bigz[sidx],bigMean[sidx])
            bigFit = bigspl(bigz[sidx])
            
            ax.plot(bigz[sidx], bigFit, 'k-', lw=2)
            ax.errorbar(bigz, bigMean, yerr=bigStd, fmt='.', color='k',capsize=0)

        axlist.append(ax)
    
        if combine_all and plot_std:
            ax2 = plt.figure().add_subplot(111)
            ax2.set_xlabel('|Height [kpc]|')
            ax2.set_ylabel('$\delta$'+label)
            ax2.set_title(ax.get_title())
            ax2.plot(bigz, bigStd, 'k')
            axlist.append(ax2)

    return axlist
        fs = wav_params[2]
    except IOError, e:
        print "Could not read file: %s" % e
        sys.exit(-1)


    # cast to mono
    if len(data.shape) == 2:
        data = data.sum(axis=0)  # should this be .mean()?

    window_frames= int(fs * window_seconds)
    silence_frames = int(fs * silence_seconds)

    print "Analyzing"
    move_std = move_std(data, window=window_frames/2)
    mean_std = nanmean(move_std)

    print "move_std shape: ", move_std.shape
    print "len(data): ", len(data)

    widgets = ["Creating file", Bar(), ETA()]
    pbar = ProgressBar(widgets=widgets, maxval=len(data)).start()

    new_data = []
    silence_count = 0
    for i, d in pbar(enumerate(data)):
        new_data.append(d)
        if move_std[i] is not np.nan and move_std[i] < mean_std:
            silence_count += 1
        else:
            if window_frames < silence_count < silence_frames: