Esempio n. 1
0
def read_file ( filename ):
    """
    Lit un fichier USPS et renvoie un tableau de tableaux d'images.
    Chaque image est un tableau de nombres réels.
    Chaque tableau d'images contient des images de la même classe.
    Ainsi, T = read_file ( "fichier" ) est tel que T[0] est le tableau
    des images de la classe 0, T[1] contient celui des images de la classe 1,
    et ainsi de suite.
    """
    # lecture de l'en-tête
    infile = open ( filename, "r" )    
    nb_classes, nb_features = [ int( x ) for x in infile.readline().split() ]

    # creation de la structure de données pour sauver les images :
    # c'est un tableau de listes (1 par classe)
    data = np.empty ( 10, dtype=object )   
    filler = np.frompyfunc(lambda x: list(), 1, 1)
    filler( data, data )

    # lecture des images du fichier et tri, classe par classe
    for ligne in infile:
        champs = ligne.split ()
        if len ( champs ) == nb_features + 1:
            classe = int ( champs.pop ( 0 ) )
            data[classe].append ( map ( lambda x: float(x), champs ) )     
    infile.close ()

    # transformation des list en array
    output  = np.empty ( 10, dtype=object )
    filler2 = np.frompyfunc(lambda x: np.asarray (x), 1, 1)
    filler2 ( data, output )

    return output
Esempio n. 2
0
    def __init__(self, rng, nin, nout, activation=logistic, activation_prime=dlogistic, W=None, b=None, inputs=None, learningRate=.2):
        self.ActivationFn = np.frompyfunc(activation, 1, 1)
        self.DActivationFn = np.frompyfunc(activation_prime, 1, 1)

        self.inputs = []
        if inputs: self.inputs.append(inputs)
        self.activations = []
        self.outputs = []

        self.learningRate = learningRate
        self.momentumFactor = 0     # TODO: ensure momentum is still implemented correctly
        self.previousDelta = None
        self.previousbDelta = None

        #W = np.zeros((nin, nout))
        if W is None:
            self.W = np.asarray(
                rng.uniform(
                    low=4 * np.sqrt(6.0 / (nin + nout)),     # generic range of values
                    high=-4 * np.sqrt(6.0 / (nin + nout)),
                    size=(nin, nout)
                )
                , dtype=float
            )
        else:
            self.W = W
        if not b:
            self.b = np.zeros(nout)
        else:
            self.b = b
Esempio n. 3
0
    def __init__(self, rng, nin, nout, activation=logistic, activation_prime=dlogistic, W=None, b=None, inputs=None, learningRate=.9):
        self.ActivationFn = np.frompyfunc(activation, 1, 1)
        self.DActivationFn = np.frompyfunc(activation_prime, 1, 1)
        self.inputs = inputs
        self.activations = None
        self.activationHistory = None
        self.outputs = None
        self.outputHistory = None
        self.learningRate = learningRate
        self.momentumFactor = .7
        self.previousDelta = None
        self.previousbDelta = None

        if not W:
            self.W = np.asarray(
                rng.uniform(
                    low=4 * np.sqrt(6.0 / (nin + nout)),     # generic range of values
                    high=-4 * np.sqrt(6.0 / (nin + nout)),
                    size=(nin, nout)
                )
                , dtype=float
            )
        else:
            self.W = W
        if not b:
            self.b = np.zeros(nout)
        else:
            self.b = b
Esempio n. 4
0
def __get_function__(astr, width):
    f = np.cos
    if astr == "cosine":
        f = np.cos
    elif astr == "rampup":
        f = np.frompyfunc(
            lambda x: w.ramp_up(x, width),
            1, 1
            )
    elif astr == "rampdown":
        f = np.frompyfunc(
            lambda x: w.ramp_down(x, width),
            1, 1
            )
    elif astr == "impulse":
        f = np.frompyfunc(
            lambda x: w.impulse(x, width),
            1, 1
            )
    elif astr == "step":
        f = np.frompyfunc(
            lambda x: w.step(x, width),
            1, 1
            )
    else:
        f = np.cos
    return f
Esempio n. 5
0
    def test_frompyfunc_2d_sig(self):
        import sys
        from numpy import frompyfunc, dtype, arange

        if "__pypy__" not in sys.builtin_module_names:
            skip("PyPy only frompyfunc extension")

        def times_2(in_array, out_array):
            assert len(in_array.shape) == 2
            assert in_array.shape == out_array.shape
            out_array[:] = in_array * 2

        ufunc = frompyfunc(
            [times_2], 1, 1, signature="(m,n)->(n,m)", dtypes=[dtype(int), dtype(int)], stack_inputs=True
        )
        ai = arange(18, dtype=int).reshape(2, 3, 3)
        ai3 = ufunc(ai[0, :, :])
        ai2 = ufunc(ai)
        assert (ai2 == ai * 2).all()

        ufunc = frompyfunc(
            [times_2], 1, 1, signature="(m,m)->(m,m)", dtypes=[dtype(int), dtype(int)], stack_inputs=True
        )
        ai = arange(12 * 3 * 3, dtype="int32").reshape(12, 3, 3)
        exc = raises(ValueError, ufunc, ai[:, :, 0])
        assert "perand 0 has a mismatch in its core dimension 1" in exc.value.message
        ai3 = ufunc(ai[0, :, :])
        ai2 = ufunc(ai)
        assert (ai2 == ai * 2).all()
        # view
        aiV = ai[::-2, :, :]
        assert aiV.strides == (-72, 12, 4)
        ai2 = ufunc(aiV)
        assert (ai2 == aiV * 2).all()
Esempio n. 6
0
    def plot_time_function(self, p):
        """Plot the time function.
        """
        n_steps = self.n_steps
        mats = self.mats
        step_size = self.step_size

        ls_t = linspace(0, step_size * n_steps, n_steps + 1)
        ls_fn = frompyfunc(self.time_function, 1, 1)
        ls_v = ls_fn(ls_t)

        p.subplot(321)
        p.plot(ls_t, ls_v, "ro-")

        final_epsilon = self.final_displ / self.length

        kappa = linspace(mats.epsilon_0, final_epsilon, 10)
        omega_fn = frompyfunc(lambda kappa: mats._get_omega(None, kappa), 1, 1)
        omega = omega_fn(kappa)
        kappa_scaled = step_size + (1 - step_size) * (kappa - mats.epsilon_0) / (final_epsilon - mats.epsilon_0)
        xdata = hstack([array([0.0], dtype=float), kappa_scaled])
        ydata = hstack([array([0.0], dtype=float), omega])
        p.plot(xdata, ydata, "g")
        p.xlabel("regular time [-]")
        p.ylabel("scaled time [-]")
Esempio n. 7
0
def mov_average_expw(data, span, tol=1e-6):
    """Calculates the exponentially weighted moving average of a series.

:Parameters:
    $$data$$
    span : int 
        Time periods. The smoothing factor is 2/(span + 1)
    tol : float, *[1e-6]*
        Tolerance for the definition of the mask. When data contains masked 
        values, this parameter determinea what points in the result should be masked.
        Values in the result that would not be "significantly" impacted (as 
        determined by this parameter) by the masked values are left unmasked."""

    data = marray(data, copy=True, subok=True)
    ismasked = (data._mask is not nomask)
    data._mask = N.zeros(data.shape, bool_)
    _data = data._data
    #
    k = 2./float(span + 1)
    def expmave_sub(a, b):
        return a + k * (b - a)
    #
    data._data.flat = N.frompyfunc(expmave_sub, 2, 1).accumulate(_data)
    if ismasked:
        _unmasked = N.logical_not(data._mask).astype(float_)
        marker = 1. - N.frompyfunc(expmave_sub, 2, 1).accumulate(_unmasked)
        data._mask[marker > tol] = True
    data._mask[0] = True
    #
    return data
Esempio n. 8
0
    def test_frompyfunc_sig_broadcast(self):
        import sys
        from numpy import frompyfunc, dtype, arange

        if "__pypy__" not in sys.builtin_module_names:
            skip("PyPy only frompyfunc extension")

        def sum_along_0(in_array, out_array):
            out_array[...] = in_array.sum(axis=0)

        def add_two(in0, in1, out):
            out[...] = in0 + in1

        ufunc_add = frompyfunc(
            add_two,
            2,
            1,
            signature="(m,n),(m,n)->(m,n)",
            dtypes=[dtype(int), dtype(int), dtype(int)],
            stack_inputs=True,
        )
        ufunc_sum = frompyfunc(
            [sum_along_0], 1, 1, signature="(m,n)->(n)", dtypes=[dtype(int), dtype(int)], stack_inputs=True
        )
        ai = arange(18, dtype=int).reshape(3, 2, 3)
        aout = ufunc_add(ai, ai[0, :, :])
        assert aout.shape == (3, 2, 3)
        aout = ufunc_sum(ai)
        assert aout.shape == (3, 3)
Esempio n. 9
0
    def polar_workspace_init(radial_bins=256, angular_bins=256,
                             max_radius=None, centre=None): 
        #if (centre == None) and self.centre == None:
            #pass # Raise an exception

        xdim = self.image.shape[0]
        ydim = self.image.shape[1]

        if centre == None:
            xc = xdim * 0.5
            yc = ydim * 0.5 
        else:
            xc = centre[0]
            yc = centre[1]

        # Calculate minimum distance from centre to edge of image - this
        # determines the maximum radius in the polar image
        xsize = min (xdim + 0.5 - xc, xc)
        ysize = min (ydim + 0.5 - yc, yc)
        max_rad = m.sqrt(xsize**2 + ysize**2)

        if max_radius == None:
            max_radius = max_rad
        elif max_radius > max_rad:
            raise ValueError
        
        # Set up interpolation - cubic spline with no smoothing by default 
        x = numpy.indices((xdim,)) + 0.5 - centre[0]
        y = numpy.indices((ydim,)) + 0.5 - centre[1]
        interp = spint.RectBivariateSpline(x, y, self.image)

        # Polar image bin widths
        theta_bin_width = (2.0 * math.pi) / (theta_bins - 1.0)
        radial_bin_width = max_radius / (radial_bins - 1.0)

        # Calculate polar image values - use vectorization for efficiency
        # Because we broadcast when using a ufunc (created by frompyfunc
        # below), we could get away with an ogrid here to save time and space?
        r, theta = numpy.mgrid[0:radial_bins, 0:angular_bins]
        theta = (theta + 0.5) * theta_bin_width
        r = (r + 0.5) * radial_bin_width

        def polar_pix_val(r, theta):
            # Should we use the numpy.sin/cos functions here for more
            # efficiency ?
            return interp.ev(r * m.sin(theta), r * m.cos(theta))

        numpy.frompyfunc(polar_pix_val, 2, 1)
        self.pimage = polar_pix_val(r, theta)

        # Calculate polar image values - non-vectorized version
        self.pimage = numpy.empty(radial_bins, angular_bins)
        for r in radial_bins:
            R = (r + 0.5) * radial_bin_width;
            for t in theta_bins:
                theta = (t + 0.5) * theta_bin_width
                x = R * sin(theta)
                y = R * cos(theta)
                self.pimage[r, t] = interp.ev(x, y)
Esempio n. 10
0
def Model2a():
    """
    A version of Model3 where the activation functions are generated
    using a different numpy function. For exploratory reasons only.
    """
    m2a=Model2()
    sn = m2a.GetGroupByName("SN")
    sp = m2a.GetGroupByName("SP")
    sn.SetActivationFunction(np.frompyfunc(lambda x: neural.STanh_plus(x, gain=2)), 1,1)
    sp.SetActivationFunction(np.frompyfunc(lambda x: neural.STanh_plus(x, gain=2)), 1,1) 
Esempio n. 11
0
    def redraw(self, e = None):
        if ((self.idx_x < 0 and len(self.idx_x_arr) == 0) or
             (self.idx_y < 0 and len(self.idx_y_arr) == 0) or
             self._xdata == [] or
             self._ydata == []):
            return
        #
        if len(self.idx_x_arr) > 0:
            print 'x: summation for', self.idx_x_arr
            xarray = np.array(self._xdata)[:, self.idx_x_arr].sum(1)
        else:
            xarray = np.array(self._xdata)[:, self.idx_x]

        if len(self.idx_y_arr) > 0:
            print 'y: summation for', self.idx_y_arr
            yarray = np.array(self._ydata)[:, self.idx_y_arr].sum(1)

#            print 'yarray', yarray
#            yarray_arr = array( self._ydata )[:, self.idx_y_arr]
#            sym_weigth_arr = 2. * ones_like( yarray_arr[1] )
#            sym_weigth_arr[0] = 4.
#            print 'yarray_arr', yarray_arr
#            print 'sym_weigth_arr', sym_weigth_arr
#            yarray = dot( yarray_arr, sym_weigth_arr )
#            print 'yarray', yarray


        else:
            yarray = np.array(self._ydata)[:, self.idx_y]

        if self.transform_x:
            def transform_x_fn(x):
                '''makes a callable function out of the Str-attribute
                "transform_x". The vectorised version of this function is 
                then used to transform the values in "xarray". Note that
                the function defined in "transform_x" must be defined in
                terms of a lower case variable "x".
                '''
                return eval(self.transform_x)
            xarray = np.frompyfunc(transform_x_fn, 1, 1)(xarray)

        if self.transform_y:
            def transform_y_fn(y):
                '''makes a callable function out of the Str-attribute
                "transform_y". The vectorised version of this function is 
                then used to transform the values in "yarray". Note that
                the function defined in "transform_y" must be defined in
                terms of a lower case variable "y".
                '''
                return eval(self.transform_y)
            yarray = np.frompyfunc(transform_y_fn, 1, 1)(yarray)

        self.trace.xdata = np.array(xarray)
        self.trace.ydata = np.array(yarray)
        self.trace.data_changed = True
Esempio n. 12
0
    def _get_values(self):
        l_rho = self.l_rho
        n_points = self.n_points
        gl_b = self._get_gbundle_props()[0]
        gmu_b = self._get_gbundle_props()[1]
        m_f = self.m_f
        mu_r = self.mu_r
        l_r = self.l_r

        # for Gaussian bundle strength distribution 
        if self.l_plot <= gl_b:
            gl_arr = logspace( log( self.min_plot_length,10 ), log(gl_b,10), n_points )
            gstrength_arr = self.fl(gl_arr)/self.fl(self.l_r)*self.mu_r
        elif self.l_plot > gl_b:
            gl_1 = logspace( log( self.min_plot_length,10), log(gl_b,10), n_points )
            gl_2 = logspace( log( gl_b, 10 ), log( self.l_plot, 10 ), n_points )
            gl_arr = hstack( (gl_1, gl_2) )
            gstrength_1 = self.fl( gl_1 ) / self.fl( self.l_r ) * self.mu_r
            gstrength_22 = frompyfunc( self._get_gstrength, 1, 1 )
            gstrength_2 = array( gstrength_22( gl_2 ), dtype = 'float64' )
            gstrength_arr = hstack( ( gstrength_1, gstrength_2 ) )
        # Mirek's mean approximation
            strength_22 = frompyfunc( self.mean_approx, 1, 3 )
            strength_2 = array( strength_22( gl_2 )[0], dtype = 'float64' )
            mean_gumb = array( strength_22( gl_2 )[1], dtype = 'float64' )
            med_gumb = array( strength_22( gl_2 )[2], dtype = 'float64' )

        #asymptotes for the first two branches
        if self.l_plot <= l_rho:
            al_arr = array([self.min_plot_length, self.l_plot])
            astrength_arr = array([mu_r / self.fl(l_r), mu_r / self.fl(l_r)])
        elif l_rho < self.l_plot:
            al_arr = array([self.min_plot_length, l_rho, 10. * gl_b])
            astrength_1 = mu_r / self.fl(l_r)
            astrength_2 = (l_rho/al_arr[2])**(1/m_f) * astrength_1
            astrength_arr = hstack((astrength_1,astrength_1,astrength_2))
        
        # left asymptote
        self.mu_sigma_0 = astrength_arr[0]
            
        # standard deviation for the first branch = before fragmentation
        if self.l_plot <= gl_b:
            stl_arr = logspace( log( self.min_plot_length,10) , log(self.l_plot,10) , 
                                n_points / 2. )
            stdev_arr_plus  = self.fl( stl_arr ) / self.fl( l_r ) * mu_r * (1 + self.cov)
            stdev_arr_minus = self.fl( stl_arr ) / self.fl( l_r ) * mu_r * (1 - self.cov)
        else:
            stl_arr = logspace( log( self.min_plot_length,10), log(gl_b,10), n_points)
            stdev_arr_plus = self.fl( stl_arr )/self.fl( l_r ) * mu_r * (1 + self.cov)
            stdev_arr_minus = self.fl( stl_arr )/self.fl( l_r ) * mu_r * (1 - self.cov)        
        
        return gl_arr, al_arr, gstrength_arr, astrength_arr,\
               stl_arr, stdev_arr_plus, stdev_arr_minus, gl_2,\
               strength_2, mean_gumb, med_gumb
Esempio n. 13
0
 def __init__(self, mu, alpha):
     self.map_pdf = {}
     self.map_logpdf = {}
     self.bins = []
     mu = float(mu)
     
     self.alpha = alpha
     self.mu = mu
     
     self.nbin = np.frompyfunc(self._get_value, 3, 1)
     self.nbin_log = np.frompyfunc(self._get_value_log, 3, 1)
 def _refresh_fired(self):
     xdata = linspace(0,10,10)
     fneval1 = frompyfunc( lambda x: eval( self.expression1 ), 1, 1 )
     fneval2 = frompyfunc( lambda x: eval( self.expression2 ), 1, 1 )
     fneval3 = frompyfunc( lambda x: eval( self.expression3 ), 1, 1 )
     y1 = fneval1( xdata )
     y2 = fneval2( xdata )
     y3 = fneval3( xdata )
     ydata = column_stack((y1,y2,y3))
     self.mfn.set( xdata = xdata, ydata = ydata )
     self.mfn.data_changed = True
Esempio n. 15
0
 def add_dates(df):
     fun_em = np.frompyfunc(get_date_em, 2, 1)
     fun_lig = np.frompyfunc(get_date_lig, 2, 1)
     df['date_emergence_leaf'] = fun_em(df['num_leaf_bottom'], df['fnl'])
     df['date_ligulation_leaf'] = fun_lig(df['num_leaf_bottom'], df['fnl'])
     if force_mean_fnl==False:
         df['date_emergence_flag_leaf'] = map(lambda fnl: hs_fit.TTemleaf(fnl, nff=fnl), df['fnl'])
         df['date_ligulation_flag_leaf'] = map(lambda fnl: hs_fit.TTligleaf(fnl, nff=fnl), df['fnl'])
     else:
         df['date_emergence_flag_leaf'] = hs_fit.TTemleaf(hs_fit.mean_nff, nff=None)[0]
         df['date_ligulation_flag_leaf'] = hs_fit.TTligleaf(hs_fit.mean_nff, nff=None)[0]
     return df
Esempio n. 16
0
 def _get_mfn_plot(self):
     n_points = 100
     sigma_max = self.sigma_fu * self.rho
     
     sigma_arr = linspace( 0, sigma_max, n_points )
     
     get_epsilon_f = frompyfunc( lambda sigma: sigma / self.E_f, 1, 1 ) 
     epsilon_f_arr  = get_epsilon_f( sigma_arr )
     
     get_epsilon_c = frompyfunc( self._get_epsilon_c, 1, 1 )
     epsilon_c_arr = get_epsilon_c( sigma_arr )
     
     return MFnLineArray( xdata = epsilon_c_arr, ydata = sigma_arr )
 def _refresh_fired(self):
     
     #creates an empty plot data container as a list of MFnLineArray classes
     self.mfn.lines = self.No_of_all_curves * [MFnLineArray()]
     
     xdata = linspace(0,10,100)
     fneval1 = frompyfunc( lambda x: eval( self.expression1 ), 1, 1 )
     fneval2 = frompyfunc( lambda x: eval( self.expression2 ), 1, 1 )
     fneval3 = frompyfunc( lambda x: eval( self.expression3 ), 1, 1 )
     self.mfn.lines[0] = MFnLineArray(xdata = xdata, ydata = fneval1( xdata ))
     self.mfn.lines[1] = MFnLineArray(xdata = xdata, ydata = fneval2( xdata ))
     self.mfn.lines[2] = MFnLineArray(xdata = xdata, ydata = fneval3( xdata ))
     self.mfn.data_changed = True
Esempio n. 18
0
    def std_of_flow_time_to_stream(streamlink, flow_dir_file, slope, radius, velocity, delta_s_file,
                                   flow_dir_code='TauDEM'):
        """Generate standard deviation of t0_s (flow time to the workflow channel from each cell).
        """
        strlk_r = RasterUtilClass.read_raster(streamlink)
        strlk_data = strlk_r.data
        rad_data = RasterUtilClass.read_raster(radius).data
        slo_data = RasterUtilClass.read_raster(slope).data

        vel_r = RasterUtilClass.read_raster(velocity)
        vel_data = vel_r.data
        xsize = vel_r.nCols
        ysize = vel_r.nRows
        nodata_value = vel_r.noDataValue

        def initial_variables(vel, strlk, slp, rad):
            """initial variables"""
            if abs(vel - nodata_value) < UTIL_ZERO:
                return DEFAULT_NODATA
            if strlk <= 0:
                tmp_weight = 1
            else:
                tmp_weight = 0
            # 0 is river
            if slp < 0.0005:
                slp = 0.0005
            # dampGrid = vel * rad / (slp / 100. * 2.) # No need to divide 100
            # in my view. By LJ
            damp_grid = vel * rad / (slp * 2.)
            celerity = vel * 5. / 3.
            tmp_weight *= damp_grid * 2. / numpy.power(celerity, 3.)
            return tmp_weight

        initial_variables_numpy = numpy.frompyfunc(initial_variables, 4, 1)
        weight = initial_variables_numpy(vel_data, strlk_data, slo_data, rad_data)

        delta_s_sqr = TerrainUtilClass.calculate_flow_length(flow_dir_file, weight, flow_dir_code)

        def cal_delta_s(vel, sqr):
            """Calculate delta s"""
            if abs(vel - nodata_value) < UTIL_ZERO:
                return nodata_value
            else:
                return sqrt(sqr) / 3600.

        cal_delta_s_numpy = numpy.frompyfunc(cal_delta_s, 2, 1)
        delta_s = cal_delta_s_numpy(vel_data, delta_s_sqr)

        RasterUtilClass.write_gtiff_file(delta_s_file, ysize, xsize, delta_s, strlk_r.geotrans,
                                         strlk_r.srs, DEFAULT_NODATA, GDT_Float32)
Esempio n. 19
0
def plotFromFile(fileName):  
    stats=pickle.load(open(fileName,'rb'))
    energies=stats['energy']
    nRepeat=len(stats['energy'][0])
    nParam=len(stats['stepSize'])
    
    filler = np.frompyfunc(lambda x: list(), 1, 1)
    pos=np.empty([nParam,nRepeat],dtype=np.object)
    filler(pos,pos)
    time=np.empty([nParam,nRepeat],dtype=np.object)
    time=filler(time,time)

    
    defaultPos=-stats['initParams']['N']/5    
    
    for paramIdx in xrange(nParam):    
        for repeat in xrange(nRepeat):
            nPts=len(stats['energy'][paramIdx][repeat])
            for ptIdx in xrange(nPts):
#                pdb.set_trace()
                fila=filament(stats['states'][paramIdx][repeat][ptIdx],stats['initParams'],defaultPos)
                filapos=filamentPos(fila,stats['initParams']['N'])
#                if paramIdx==0 and ptIdx>22: pdb.set_trace()
                pos[paramIdx][repeat].append(filapos)
                time[paramIdx][repeat].append(stats['obsStart']+stats['stepSize'][paramIdx]*ptIdx)
Esempio n. 20
0
    def test_apply_ufunc(self):
        a = self.cube
        a.units = iris.unit.Unit('meters')

        b = iris.analysis.maths.apply_ufunc(np.square, a,
                new_name='more_thingness', new_unit=a.units**2, in_place=False)

        ans = a.data**2

        self.assertArrayEqual(b.data, ans)
        self.assertEqual(b.name(), 'more_thingness')
        self.assertEqual(b.units, iris.unit.Unit('m^2'))

        def vec_mag(u, v):
            return math.sqrt(u**2 + v**2)

        c = a.copy() + 2

        vec_mag_ufunc = np.frompyfunc(vec_mag, 2, 1)
        b = iris.analysis.maths.apply_ufunc(vec_mag_ufunc, a, c)

        ans = a.data**2 + c.data**2
        b2 = b**2

        self.assertArrayAlmostEqual(b2.data, ans)
Esempio n. 21
0
def computeLaplaceMatrix(sqdist, t, logeps=mp.mpfr("-10")):
    """
    Compute heat approximation to Laplacian matrix using logarithms and gmpy2.

    Use mpfr to gain more precision.

    This is slow, but more accurate.

    Cutoff for really small values, and row/column elimination if degenerate.
    """
    # cutoff ufunc
    cutoff = np.frompyfunc((lambda x: mp.inf(-1) if x < logeps else x), 1, 1)

    t2 = mp.mpfr(t)
    lt = mp.log(2 / t2)
    d = to_mpfr(sqdist)
    L = d * d
    L /= -2 * t2
    cutoff(L, out=L)
    logdensity = logsumexp(L)
    L = exp(L - logdensity[:, None] + lt)
    L[np.diag_indices(len(L))] -= 2 / t2
    L = np.array(to_double(L), dtype=float)
    # if just one nonzero element, then erase row and column
    degenerate = np.sum(L != 0.0, axis=1) <= 1
    L[:, degenerate] = 0
    L[degenerate, :] = 0
    return L
Esempio n. 22
0
    def pi0_spectrum(self,dNdp_p, p_p,E_g,n_H=1.):
        #print 'dNdp: ',dNdp_p
        #print 'E_g : ',E_g
        kappa_pi = 0.17
        dp_p = p_p[1:] - p_p[:-1]
        dNdp_p = np.sqrt(dNdp_p[1:] * dNdp_p[:-1])

        step = lambda x: (1. + np.sign(x))/2.
        positive = lambda x: x * step(x)

        E_p = np.sqrt(p_p**2 + const.MP_GeV**2)
        E_p = np.sqrt(E_p[1:] * E_p[:-1])
        E_kin = E_p - const.MP_GeV
        kin_mask = step(E_kin**2 / kappa_pi**2 - const.MPi0_GeV**2)

        p_pi = np.sqrt(positive(E_kin**2 / kappa_pi**2 - const.MPi0_GeV**2)) + const.epsilon

        def EdNdE_gamma(E_g):
            E_pi_min = E_g + const.MPi0_GeV**2 / (4 * E_g)
            E_p_min =  const.MP_GeV+ E_pi_min / kappa_pi
            dNpi = const.C*const.m2cm* n_H * crs.sigma_pp(E_kin) * dNdp_p * dp_p
            mask = kin_mask * step(E_p - E_p_min)
            return E_g * 2 * np.sum(dNpi / p_pi * mask)
        EdNdE_gamma_vec = np.frompyfunc(EdNdE_gamma, 1, 1)
        return EdNdE_gamma_vec(E_g)
Esempio n. 23
0
    def IC_spec_gamma(self,EdNdE_ph,Eph,EdNdE_e,Ee,en):
       
        dlogE_e = np.log(Ee[1] / Ee[0]) * np.ones_like(Ee)#assumes log spacing of energy bins
        dNe = EdNdE_e * dlogE_e
        #print 'TEST: dlogE_e in IC: ',dlogE_e
        #print 'TEST: Ee[3]/Ee[2]: ',np.log(Ee[3]/Ee[2])
        #print 'TEST: Ee[6]/Ee[5]: ',np.log(Ee[6]/Ee[5])

        dLogE_ph = np.log(Eph[1] / Eph[0]) * np.ones_like(Eph)
        dN_ph = EdNdE_ph * dLogE_ph #changed
        '''print 'dNe: ',dNe
        print 'TEST: Eph: ',Eph
        print 'TEST: Ee: ',Ee
        print 'TEST: en: ',en
        print 'TEST: dLogE_ph: ',dLogE_ph
        print 'TEST: Eph[1]/Eph[0]: ',np.log(Eph[1]/Eph[0])
        print 'TEST: Eph[3]/Eph[2]: ',np.log(Eph[3]/Eph[2])
        print 'TEST: Eph[10]/Eph[9]: ',np.log(Eph[10]/Eph[9])
        print 'TEST: dN_ph: ',dN_ph'''
        
        def EdNdE_gamma(x):
            sigma=crs.sigma_ic(x,Eph,Ee)
            return const.C*const.m2cm*np.dot(dN_ph,np.dot(sigma,dNe))
        
        csec_vec = np.frompyfunc(EdNdE_gamma, 1, 1)
        return csec_vec(en)
Esempio n. 24
0
 def __init__(self, func, nin, nout):
     self._ufunc = np.frompyfunc(func, nin, nout)
     self._func = func
     self.nin = nin
     self.nout = nout
     self._name = funcname(func)
     self.__name__ = 'frompyfunc-%s' % self._name
Esempio n. 25
0
    def process_source_data(self):
        '''read in the measured data from file and assign
        attributes after array processing.        
        '''
        super(ExpBT3PT, self).process_source_data()


        elastomer_path = os.path.join(simdb.exdata_dir, 'bending_tests', 'three_point', '2011-06-10_BT-3PT-12c-6cm-0-TU_ZiE', 'elastomer_f-w.raw')
        _data_array_elastomer = loadtxt_bending(elastomer_path)

        # force [kN]:
        #
        xdata = -0.001 * _data_array_elastomer[:, 2].flatten()

        # displacement [mm]:
        #
        ydata = -1.0 * _data_array_elastomer[:, 0].flatten()

        mfn_displacement_elastomer = MFnLineArray(xdata = xdata, ydata = ydata)
        displacement_elastomer_vectorized = frompyfunc(mfn_displacement_elastomer.get_value, 1, 1)

        # convert data from 'N' to 'kN' and change sign
        #
        self.F = -0.001 * self.F

        # change sign in positive values for vertical displacement [mm]
        #
        self.w = -1.0 * self.w

        # substract the deformation of the elastomer cushion between the cylinder
        # 
        self.w = self.w - displacement_elastomer_vectorized(self.F)
Esempio n. 26
0
    def _decorator(func):
        '''
        Decorates a function, making it a Numpy ufunc.

        '''
        uf = numpy.frompyfunc(func, nin, nout)
        return bind_kwargs(uf, dtype=dtype)
Esempio n. 27
0
def test():
    import numpy as np
    u = Units()

    i = u.info("sys_beam_rss")

    conv = np.frompyfunc(i["convert"],1,1)
Esempio n. 28
0
    def test_frompyfunc_fortran(self):
        import sys
        import numpy as np

        if "__pypy__" not in sys.builtin_module_names:
            skip("PyPy only frompyfunc extension")

        def tofrom_fortran(in0, out0):
            out0[:] = in0.T

        def lapack_like_times2(in0, out0):
            a = np.empty(in0.T.shape, in0.dtype)
            tofrom_fortran(in0, a)
            a *= 2
            tofrom_fortran(a, out0)

        times2 = np.frompyfunc(
            [lapack_like_times2],
            1,
            1,
            signature="(m,n)->(m,n)",
            dtypes=[np.dtype(float), np.dtype(float)],
            stack_inputs=True,
        )
        in0 = np.arange(3300, dtype=float).reshape(100, 33)
        out0 = times2(in0)
        assert out0.shape == in0.shape
        assert (out0 == in0 * 2).all()
Esempio n. 29
0
    def test_ifunc(self):
        a = self.cube

        my_ifunc = iris.analysis.maths.IFunc(np.square,
                   lambda a: a.units**2
                   )
        b = my_ifunc(a, new_name='squared temperature', in_place=False)

        self.assertCMLApproxData(a, ('analysis', 'apply_ifunc_original.cml'))
        self.assertCMLApproxData(b, ('analysis', 'apply_ifunc.cml'))

        b = my_ifunc(a, new_name='squared temperature', in_place=True)

        self.assertCMLApproxData(b, ('analysis', 'apply_ifunc.cml'))
        self.assertCMLApproxData(a, ('analysis', 'apply_ifunc.cml'))

        def vec_mag(u, v):
            return math.sqrt(u**2 + v**2)

        c = a.copy() + 2

        vec_mag_ufunc = np.frompyfunc(vec_mag, 2, 1)
        my_ifunc = iris.analysis.maths.IFunc(vec_mag_ufunc,
                   lambda a, b: (a + b).units)

        b = my_ifunc(a, c)
        self.assertCMLApproxData(b, ('analysis', 'apply_ifunc_frompyfunc.cml'))
def draw_mandelbrot(cx, cy, d, N=200):
    global mandelbrot
    """
    绘制点(cx, cy)附近正负d的范围的Mandelbrot
    """
    x0, x1, y0, y1 = cx-d, cx+d, cy-d, cy+d 
    y, x = np.ogrid[y0:y1:N*1j, x0:x1:N*1j]
    c = x + y*1j
    mand = np.frompyfunc(iter_point,1,1)(c).astype(np.float)
    smooth_mand = np.frompyfunc(smooth_iter_point,1,1)(c).astype(np.float)
    pl.subplot(121)
    pl.gca().set_axis_off()
    pl.imshow(mand, cmap=cm.Blues_r, extent=[x0,x1,y0,y1])
    pl.subplot(122)    
    pl.imshow(smooth_mand, cmap=cm.Blues_r, extent=[x0,x1,y0,y1])
    pl.gca().set_axis_off()
Esempio n. 31
0
def generate_stoc_allows_column(indicators, sr_trend):
    ''' stocがtrendに沿う値を取っているか判定する列を返却 '''
    stod = indicators['stoD_3']
    stosd = indicators['stoSD_3']
    column_generator = np.frompyfunc(stoc_allows_entry, 3, 1)
    return column_generator(stod, stosd, sr_trend)
Esempio n. 32
0
#!/usr/bin/python

import numpy


def ultimate_answer(a):
    result = numpy.zeros_like(a)
    result.flat = 42

    return result


ufunc = numpy.frompyfunc(ultimate_answer, 1, 1)
print "The answer", ufunc(numpy.arange(4))

print "The answer", ufunc(numpy.arange(4).reshape(2, 2))
Esempio n. 33
0
"""
demo03_vectorize.py  函数矢量化
"""
import numpy as np
import math as m


def foo(x, y):
    return m.sqrt(x**2 + y**2)


x, y = 3, 4
print(foo(x, y))
x, y = np.array([3, 4, 5]), np.array([4, 5, 6])
# 矢量化foo函数 vectorize返回矢量化函数
foo_vec = np.vectorize(foo)
print(foo_vec(x, y))
print(np.vectorize(foo)(x, y).dtype)

# frompyfunc
foo_func = np.frompyfunc(foo, 2, 1)
print(foo_func(x, y).dtype)
Esempio n. 34
0
def least_squares_xeb_fidelity_from_probabilities(
    hilbert_space_dimension: int,
    observed_probabilities: Sequence[Sequence[float]],
    all_probabilities: Sequence[Sequence[float]],
    observable_from_probability: Optional[Callable[[float], float]] = None,
    normalize_probabilities: bool = True,
) -> Tuple[float, List[float]]:
    """Least squares fidelity estimator with observable based on probabilities.

    Using the notation from the docstring of
    `least_squares_xeb_fidelity_from_expectations`, this function computes the
    least squares fidelity estimate when the observable O_U has eigenvalue
    corresponding to the computational basis state |z⟩ given by g(p(z)), where
    p(z) = |⟨z|𝜓_U⟩|^2 and g is a function that can be specified. By default,
    g is the identity function, but other choices, such as the logarithm, are
    useful. By default, the probability p(z) is actually multiplied by the
    Hilbert space dimension D, so that the observable is actually g(D * p(z)).
    This behavior can be disabled by setting `normalize_probabilities` to
    False.

    Args:
        hilbert_space_dimension: Dimension of the Hilbert space on which
           the channel whose fidelity is being estimated is defined.
        observed_probabilities: Ideal probabilities of bitstrings observed in
            experiments. A list of lists, where each inner list contains the
            probabilities for a single circuit.
        all_probabilities: Ideal probabilities of all possible bitstrings.
            A list of lists, where each inner list contains the probabilities
            for a single circuit, and should have length equal to the Hilbert
            space dimension. The order of the lists should correspond to that
            of `observed_probabilities`.
        observable_from_probability: Function that computes the observable from
            a given probability.
        normalize_probabilities: Whether to multiply the probabilities by the
            Hilbert space dimension before computing the observable.

    Returns:
        A tuple of two values. The first value is the estimated fidelity.
        The second value is a list of the residuals

            f (e_U - u_U) - (m_U - u_U)

        of the least squares minimization.
    """
    if not isinstance(observable_from_probability, np.ufunc):
        if observable_from_probability is None:
            observable_from_probability = lambda p: p
        else:
            observable_from_probability = np.frompyfunc(
                observable_from_probability, 1, 1)
    observable_from_probability = cast(Callable, observable_from_probability)
    measured_expectations = []
    exact_expectations = []
    uniform_expectations = []
    prefactor = hilbert_space_dimension if normalize_probabilities else 1.0
    for observed_probs, all_probs in zip(observed_probabilities,
                                         all_probabilities):
        observed_probs = np.array(observed_probs)
        all_probs = np.array(all_probs)
        observable = observable_from_probability(prefactor *
                                                 cast(np.ndarray, all_probs))
        measured_expectations.append(
            np.mean(
                observable_from_probability(prefactor *
                                            cast(np.ndarray, observed_probs))))
        exact_expectations.append(np.sum(all_probs * observable))
        uniform_expectations.append(
            np.sum(observable) / hilbert_space_dimension)
    return least_squares_xeb_fidelity_from_expectations(
        measured_expectations, exact_expectations, uniform_expectations)
Esempio n. 35
0
    def _get_ip_ls_values(self):
        # TODO:define the ineraction wirh ls
        ls_fn = frompyfunc(self.sdomain.ls_fn_X, 2, 1)

        X, Y, Z = self.ip_X.T  # 3d coords - vtk
        return ls_fn(X, Y).flatten()
Esempio n. 36
0
 def update_acc(self, func):
     ufunc = np.frompyfunc(func, 3, 1)
     self.avec = ufunc(self.jvec, self.jvec_old, self.avec)
     self.jvec_old = np.copy(self.jvec)
Esempio n. 37
0
from nengo.utils.numpy import norm

from hunse_thesis.neurons import static_f_df
from hunse_thesis.offline_learning import (Network, BPLearner, BPLocalLearner,
                                           FASkipLearner, squared_cost,
                                           rms_error, make_flat_batch_fn)
from hunse_thesis.utils import initial_weights, initial_w

sns.set_style('white')
sns.set(context='paper', style='ticks', palette='dark')

# rng = np.random
rng = np.random.RandomState(9)

xor = np.frompyfunc(lambda x, y: 0.5 * (np.abs(x + y) - np.abs(x - y)), 2, 1)


def xor_reduce(a, **kwargs):
    return xor.reduce(a, **kwargs).astype(a.dtype)


def weight_norm_s(weights):
    return ", ".join("||W%d|| = %0.3f" % (i, norm(w))
                     for i, w in enumerate(weights))


def binary_classification_error(y, ystar):
    return np.sign(y) != ystar

Esempio n. 38
0
import sys

sys.path.insert(0, LOCATION)
from mpmath import gammainc as _mp_ginc

from hmf.integrate_hmf import hmf_integral_gtm


def _flt(a):
    try:
        return a.astype('float')
    except AttributeError:
        return float(a)


_ginc_ufunc = np.frompyfunc(lambda z, x: _mp_ginc(z, x), 2, 1)


def gammainc(z, x):
    return _flt(_ginc_ufunc(z, x))


class TestAnalyticIntegral(object):
    def __init__(self):
        pass

    def tggd(self, m, loghs, alpha, beta):
        return beta * (m / 10**loghs)**alpha * np.exp(-(m / 10**loghs)**beta)

    def anl_int(self, m, loghs, alpha, beta):
        return 10**loghs * gammainc((alpha + 1) / beta, (m / 10**loghs)**beta)
Esempio n. 39
0
    if threshold == 'auto':
        threshold = n_blocks / (n_blocks + 1.0) + 1.0e-8
    weights = [(word, score) for (word, score) in zip(vocab, h) if score > threshold]
    weights.sort(key=lambda x: -x[1])
    result = weights if scores else [word for (word, score) in weights]
    if not (scores or split):
        result = '\n'.join(result)
    return result


def __log_combinations_inner(n, m):
    """Calculates the logarithm of n!/m!(n-m)!"""
    return -(numpy.log(n + 1) + scipy.special.betaln(n - m + 1, m + 1))


__log_combinations = numpy.frompyfunc(__log_combinations_inner, 2, 1)


def __marginal_prob(blocksize, n_words):

    def marginal_prob(n, m):
        """Marginal probability of a word that occurs n times in the document
           occurring m times in a given block"""

        return numpy.exp(
            __log_combinations(n, m)
            + __log_combinations(n_words - n, blocksize - m)
            - __log_combinations(n_words, blocksize)
        )

    return numpy.frompyfunc(marginal_prob, 2, 1)
Esempio n. 40
0
 def _object_dtype_isnan(X):
     return np.frompyfunc(lambda x: x != x, 1, 1)(X).astype(bool)
Esempio n. 41
0
def BBcurve(data=None, T=5, n=20, alpha=0.4, resample=False,plot=True):
    """
    T = 5                        # 采样的间隔交易日
    n = 20                       # 采样点数
    alpha = 0.4                  # 落入牛熊价格区间的置信度为(1-alpha)
    resample =true means as T to calulate the line else calculate everyday
    """
    data = getShData()

    a = ss.t.ppf(1 - alpha / 2, n)  # (1-alpha)置信度对应的区间上限

    close = data.close
    t = close.index.values[n * T:]  # 至少第n*T个交易日才有第1个牛熊计算值
    # 对数日收益率序列
    logreturns = np.diff(np.log(close))
    # 计算mu
    _mean = lambda i: np.mean(logreturns[i - n * T + 1:i + 1])  # calculate range[0,n*T] as mean[0]
    _mean = np.frompyfunc(_mean, 1, 1)
    mu = _mean(np.arange(n * T - 1, len(logreturns)))  # len(data)-n*T 's mu

    # 计算sigma
    _var = lambda i: np.var(logreturns[i - n * T + 1:i + 1])  # len(data)-n*T 's sigma
    _var = np.frompyfunc(_var, 1, 1)
    sigma = _var(np.arange(n * T - 1, len(logreturns)))
    sigma = np.sqrt(sigma.astype(float))

    if resample:
        # 对close,mu,sigma进行同步采样
        t_T = close.index.values[n * T::T]  # from n*T
        close_T = close.values[n * T::T]
        mu_T = mu[::T]
        sigma_T = sigma[::T]

        bull_c = np.log(close_T) + T * mu_T + np.sqrt(T) * sigma_T * a  # starts from n*T,which the list count from 0
        bull_c = np.exp(bull_c.astype(float))  # it calculated the n*T+T day's bull_c
        # uses 'n*T''s close and [0,n*T](not contain 'n*T')'s mu
        bear_c = np.log(close_T) + T * mu_T - np.sqrt(T) * sigma_T * a
        bear_c = np.exp(bear_c.astype(float))

        bb_c = np.log(close_T) + T * mu_T
        bb_c = np.exp(bb_c.astype(float))
        if t_T[-1] != t[-1]:  # the last value may calculated more than the day t[-1],but also can be use
            t_T = np.append(t_T, t[-1:])
            bb = pd.DataFrame({'bull': bull_c, 'bear': bear_c, 'bb': bb_c}, index=t_T[1:])
        else:
            bb = pd.DataFrame({'bull': bull_c[:-1], 'bear': bear_c[:-1], 'bb': bb_c[:-1]}, index=t_T[1:])

        bb = bb.resample('1D').bfill()

    else:
        bull_c = np.log(close.values[n * T:]) + T * mu + np.sqrt(T) * sigma * a
        bull_c = np.exp(bull_c.astype(float))
        bear_c = np.log(close.values[n * T:]) + T * mu - np.sqrt(T) * sigma * a
        bear_c = np.exp(bear_c.astype(float))
        bb_c = np.log(close.values[n * T:]) + T * mu
        bb_c = np.exp(bb_c.astype(float))

        bb = pd.DataFrame({'bull': bull_c[:-T], 'bear': bear_c[:-T], 'bb': bb_c[:-T]}, index=t[T:])
        bb['means'] = bb['bull'].rolling(window=5, center=False).mean()
    plt.figure(figsize=(16, 8))
    plt.plot(t, close[t], 'k', label="$CLOSE$", linewidth=1)
    plt.plot(bb.index.values, bb.bull.values, color='r', label="$Bull$",
             linewidth=1)  # the first one is n*T+T,and the bull_c is for n*T+T
    plt.plot(bb.index.values, bb.bear.values, color='g', label="$Bear$", linewidth=1)
    # plt.plot(bb.index.values, bb.means.values, color='g', label="$BB$", linewidth=2)
    plt.title("Bull and Bear")
    plt.legend()
    plt.grid()
    if plot:
        plt.show()
    return bb
Esempio n. 42
0
def my_normalize(image):
    scaled_image = np.frompyfunc(lambda x: max(0, min(x, 255)), 1, 1)(image).astype(np.uint8)
    return scaled_image
Esempio n. 43
0
def SetFont(ax, fig, fontsize=12, fontname='Arial', items=None):
    """Change font properties of all axes
        ax: which axis or axes to change the font. Default all axis in current
            instance. To skip axis, input as [].
        fig: figure handle to change the font (text in figure, not in axis).
        Default is any text items in current instance. To skip, input as [].
        fontsize: size of the font, specified in the global variable
        fontname: fullpath of the font, specified in the global variable
        items: select a list of items to change font. ['title', 'xlab','ylab',
               'xtick','ytick', 'texts','legend','legendtitle']
       
        """
    def get_ax_items(ax):
        """Parse axis items"""
        itemDict = {
            'title': [ax.title],
            'xlab': [ax.xaxis.label],
            'ylab': [ax.yaxis.label],
            'xtick':
            ax.get_xticklabels(),
            'ytick':
            ax.get_yticklabels(),
            'xminortick':
            ax.get_xminorticklabels(),
            'yminortick':
            ax.get_yminorticklabels(),
            'texts':
            ax.texts if isinstance(ax.texts,
                                   (np.ndarray, list)) else [ax.texts],
            'legend': [] if ax.legend_ is None else ax.legend_.get_texts(),
            'legendtitle':
            [] if ax.legend_ is None else [ax.legend_.get_title()]
        }
        itemList, keyList = [], []
        if items is None:  # get all items
            for k, v in iter(itemDict.items()):
                itemList += v
                keyList += [k] * len(v)
        else:  # get only specified item
            for k in items:
                itemList += itemDict[k]  # add only specified in items
                keyList += [k] * len(itemDict[k])

        return (itemList, keyList)

    def get_fig_items(fig):
        """Parse figure text items"""
        itemList = fig.texts if isinstance(fig.texts,(np.ndarray,list)) \
                                else [fig.texts]
        keyList = ['texts'] * len(itemList)

        return (itemList, keyList)

    def CF(itemList, keyList):
        """Change font given item"""
        # initialize fontprop object
        fontprop = fm.FontProperties(style='normal',
                                     weight='normal',
                                     stretch='normal')
        if os.path.isfile(fontname):  # check if font is a file
            fontprop.set_file(fontname)
        else:  # check if the name of font is available in the system
            if not any([
                    fontname.lower() in a.lower()
                    for a in fm.findSystemFonts(fontpaths=None, fontext='ttf')
            ]):
                print('Cannot find specified font: %s' % (fontname))
            fontprop.set_family(fontname)  # set font name
        # set font for each object
        for n, item in enumerate(itemList):
            if isinstance(fontsize, dict):
                if keyList[n] in fontsize.keys():
                    fontprop.set_size(fontsize[keyList[n]])
                else:
                    pass
                    # print('Warning font property {} not in specified fontsize. Font is kept at defualt.'.format(keyList[n]))
            elif n < 1:  # set the properties only once
                fontprop.set_size(fontsize)
            item.set_fontproperties(fontprop)  # change font for all items

    def CF_ax(ax):  # combine CF and get_ax_items
        if not ax:  # true when empty or None
            return  # skip axis font change
        itemList, keyList = get_ax_items(ax)
        CF(itemList, keyList)

    def CF_fig(fig):  # combine CF and get_fig_items
        if not fig:  # true when empty or None
            return  # skip figure font change
        itemsList, keyList = get_fig_items(fig)
        CF(itemsList, keyList)

    # vecotirze the closure
    CF_ax_vec = np.frompyfunc(CF_ax, 1, 1)
    CF_fig_vec = np.frompyfunc(CF_fig, 1, 1)

    # Do the actual font change
    CF_ax_vec(ax)
    CF_fig_vec(fig)
def process_file(file_path):

    # load the .mat file, squeezing all the useless Matlab extra dimensions
    s = scipy.io.loadmat(file_path, squeeze_me=True)
    # TODO vedere se c'è modo di aprire direttamente i .mat v7.3 tramite hdf5.py

    minimum_frequency = s['starting_fft_frequency']  # 0 Hz
    maximum_frequency = 128  # TODO hardcoded # TODO magari si può ottenere dal tempo di sottocampionamento # TODO 1/(2*s['subsampling_time']) # Nyquist_frequency = time_sampling_rate / 2
    frequency_interval = maximum_frequency - minimum_frequency
    fft_lenght = s['fft_lenght']  # t_FFT = 8192 seconds
    frequency_resolution = s['frequency_resolution']  # 1/t_FFT = 1/8192
    reduction_factor = s['reduction_factor']  # TODO capire come è definito

    frequencies = numpy.linspace(start=minimum_frequency,
                                 stop=maximum_frequency,
                                 num=int(frequency_interval /
                                         frequency_resolution))
    # do NOT use numpy.arange(start=minimum_frequency, stop=maximum_frequency, step=frequency_resolution) because of half-open interval!
    subsampled_frequencies = numpy.linspace(
        start=minimum_frequency,
        stop=maximum_frequency,
        num=int(frequency_interval /
                (frequency_resolution *
                 reduction_factor)))  # TODO farlo in maniera più elegante

    number_of_zeros = s[
        'number_of_zeros']  # TODO controllarne correttezza relativamente a percentage_of_zeros
    unilateral_number_of_samples = s['unilateral_number_of_samples']
    percentage_of_zeros = s[
        'percentage_of_zeros']  # TODO attenzione che percentage_of_zeros NON è uguale a number_of_zeros/unilateral_number_of_samples (anche perché il rapporto viene a volte > 1 (cosa assurda))

    # TODO 1/sqrt
    total_normalization = numpy.sqrt(2) * s['normalization_factor'] * s[
        'window_normalization'] * numpy.sqrt(1 - percentage_of_zeros)

    # scaling_factor = s['scaling_factor'] # arbitrary factor used sometimes to rescale the data

    # TODO ex memory error
    # TODO valori diversi se si mette all'esterno il quadrato della normalizzazione
    power_spectrum = numpy.square(
        numpy.abs(s['fft_data'] * total_normalization))  #*scaling_factor
    # don't worry if an overflow RuntimeWarning will be printed by numpy: see below
    # TODO fft unilatera?

    #total_normalization = total_normalization.astype(numpy.float64)
    #total_normalization = numpy.double(total_normalization)
    # float64 slows down computation and cannot be handled by GPU
    # so we are forced to take into account the possibility of overflow and truncation errors (RuntimeWarning: overflow)
    # replace the eventual infinities with the maximum float32 number
    power_spectrum[numpy.isinf(power_spectrum)] = numpy.finfo(
        numpy.float32).max  # float32_max = 3.4028235e+38

    # autoregressive_spectrum and periodogram are stored in the file as square roots, so we need to make the square of them
    autoregressive_spectrum = numpy.square(
        s['autoregressive_spectrum'])  #*scaling_factor
    periodogram = numpy.square(s['periodogram'])  #*scaling_factor

    # frequency band selection
    # the best/cleaner frequency interval is roughly the region between 80 Hz and 120 Hz
    # so, that will be the only band we'll analyze
    cleanest_frequency_band = numpy.logical_and(80 <= frequencies,
                                                frequencies <= 120)
    selected_power_spectrum = power_spectrum[cleanest_frequency_band]
    selected_frequencies = frequencies[cleanest_frequency_band]

    cleanest_subsampled_frequency_band = numpy.logical_and(
        80 <= subsampled_frequencies, subsampled_frequencies <= 120)
    selected_autoregressive_spectrum = autoregressive_spectrum[
        cleanest_subsampled_frequency_band]
    selected_periodogram = periodogram[cleanest_subsampled_frequency_band]
    selected_subsampled_frequencies = subsampled_frequencies[
        cleanest_subsampled_frequency_band]

    # untill now, we have filtered and selected frequencies. so it was useful to have the main axis of the matrices on the dimension "frequency"
    # from here on, we will need to iterate over "time". so it's useful to transpose everything
    power_spectrum = numpy.transpose(power_spectrum)
    autoregressive_spectrum = numpy.transpose(autoregressive_spectrum)
    periodogram = numpy.transpose(periodogram)
    selected_power_spectrum = numpy.transpose(selected_power_spectrum)
    selected_autoregressive_spectrum = numpy.transpose(
        selected_autoregressive_spectrum)
    selected_periodogram = numpy.transpose(selected_periodogram)

    # all the following filter selections are evaluated in the interesting frequency band only (from 80 to 120 Hz)
    # in this way we do not waste good samples that have minor problems outside the region of interst

    # all-empty FFTs are immediately discarded
    is_empty = numpy.all(selected_power_spectrum == 0,
                         axis=1)  # all zeros in the frequency (Fourier) domain
    is_not_empty = numpy.logical_not(is_empty)

    has_not_many_temporal_holes = percentage_of_zeros < 0.2  # less than 20% zeros in the time domain
    # TODO vedere quanto percentage_of_zeros sia necessario a posteriori degli altri filtri sul rumore
    # TODO vedere se da qualche parte nei loro file è sritta la posizione in cui sono stati messi i vari zero

    # TODO farne un istogramma per stabilire una soglia che sia data-driven

    #goods = [16, 17, 18, 19, 20, 21, 63, 64, 75, 76, 77, 82, 83, 94]

    # TODO percentage_of_zeros[goods]
    # TODO la condizione sulla differenza percentuale tra spettro autoregressivo e periodogramma mi sembra quella più solida e generale per essere future-proof e per tener conto dei miglioramenti nel tempo del rumore.
    # TODO NON è però abbastanza: rimangono degli spettri spuri
    # TODO sui picchi però c'è sempre una differenza, per cui forse sarebbe indicato fare prima il whitening

    # given the fact that out current data are really dirty, we place a condition on the median of the autoregressive spectrum, to be sure that it lies in the correct range # TODO levare questo vincolo quando i dati saranno migliori # TODO poi dividere per sps col 128
    # the periodogram can be higher than the autoregressive spectrum, because it suffers when there are bumps and unwanted impulses in the time domain
    # the median is more robust than the average
    autoregressive_spectrum_median = numpy.median(
        selected_autoregressive_spectrum, axis=1)
    #absolute_tolerance = 1e-7 # TODO fine tuned (seguendo i risultati della valutazione fatta ad occhio) (sarebbe meglio mettere differenza relativa, per essere maggiormente future-proof)
    #is_in_the_usual_range = numpy.isclose(autoregressive_spectrum_median, 6.5e-7, atol=absolute_tolerance) # (6.5 ± 1) * 10^-7
    #is_out_of_usual_range = numpy.logical_not(is_in_the_usual_range)
    #is_empty_or_unusual = numpy.logical_or(is_empty, is_out_of_usual_range)
    # TODO farlo con numpy.any()

    # autoregressive_spectrum and periodogram must be more or less the same in this flat area
    # they are different in the peaks, because by construction the autoregressive mean ignores them
    # the autoregressive_spectrum can follow the noise nonstationarities
    periodogram_median = numpy.median(selected_periodogram, axis=1)
    #median_difference = autoregressive_spectrum_median - periodogram_median
    #has_discrepancies = numpy.abs(median_difference) >= 1e-5 # max_difference = 10^-5 # TODO fine tuned (sarebbe meglio mettere differenza relativa, per essere maggiormente future-proof)

    #is_flagged = numpy.logical_or(is_empty_or_unusual, has_discrepancies) # is_empty | is_out_of_usual_range | has_discrepancies
    #is_science_ready = numpy.logical_not(is_flagged)
    # TODO farlo con numpy.any

    # TODO il valore basale del rumore è diverso per tutti e 3 i detector
    spectra_relative_difference = numpy.abs(
        relative_difference(
            periodogram_median,
            autoregressive_spectrum_median))  # some values are NaN
    is_consistent = spectra_relative_difference < 0.1  # numpy.nan < 0.1 evaluates to False, so everything is ok
    #is_consistent = numpy.isclose(periodogram_median, autoregressive_spectrum_median, rtol=0.1) # pay attention: numpy.isclose returns True if the two spectra are empty (all zeros), while the previous method uses the NaN while dividing by zero and correctly classify the spectra as not consistent
    # relative_tolerance = 10% # TODO fine tuned threshold
    # TODO MA gli vanno levati gli is_empty (perché la tolleranza relativa con gli zeri ha problemi) (is_close and not is_empty)
    # TODO new elementwise comparison: numpy.equal

    # la mediana è più resistente della media rispetto alla presenza di forti outliers
    # median: 'middle' value
    # mode: most common value
    # extreme outliers change the values of mean and variance

    # l'autoregressive_spectrum segue meglio i dati rispetto al periodogramma
    # il secondo quartile è la mediana dai dati
    # interquartile_ratio/2 è l'equivalente della sigma (standard deviation) ma al 50% invece che al 68%
    goodness_constraints = numpy.all(
        [is_not_empty, has_not_many_temporal_holes, is_consistent], axis=0
    )  # check if all conditions are satisfied (like with logical and)

    # if there isn't any good FFT (that is: if all FFTs are bad)
    # (this 'if' statement is required because in the calculation of the median we cannot divide by zero)
    if numpy.all(goodness_constraints ==
                 False):  #if not numpy.any(goodness_constraints):
        is_science_ready = goodness_constraints  # all False
    else:
        # evaluating the middle_value makes sense only for the already-proven good FFTs (it's a fine-tuning selection). otherwise the middle_value will be influenced by the already-proven bad values
        middle_value = numpy.median(
            autoregressive_spectrum_median[goodness_constraints])
        is_in_the_usual_range = numpy.isclose(
            autoregressive_spectrum_median, middle_value,
            rtol=0.5)  # relative_tolerance = 50% # TODO fine tuned

        #numpy.all([is_consistent, is_not_empty], axis=0)
        is_science_ready = numpy.logical_and(goodness_constraints,
                                             is_in_the_usual_range)
    is_flagged = numpy.logical_not(is_science_ready)

    detector = s['detector']

    # TODO plottare gli istogrammi (magari 2D sui 2 detector) per trovare i tagli ottimali

    #
    #    spectra_relative_difference[numpy.isnan(spectra_relative_difference)] = 10 # TODO dummy value
    #
    #    a = spectra_relative_difference # less than 0.1
    #    values_relative_difference = numpy.abs(relative_difference(autoregressive_spectrum_median, middle_value))
    #    b = values_relative_difference # less than 0.5
    #    is_science_ready
    #

    #    # for a given detector
    #    a = spectra_relative_difference[is_not_empty]
    #    b = autoregressive_spectrum_median[is_not_empty]
    #    pyplot.figure(figsize=[10,10])
    #    #pyplot.hist2d(a,b, bins=100, range=[[0, 1],[0,1]], cmap='gray_r')
    #    #pyplot.scatter(a,b)
    #    #pyplot.xscale('log')
    #    #pyplot.yscale('log')
    #    pyplot.loglog(a,b, marker='o', linestyle='None')
    #    pyplot.title('spectra selection ')
    #    pyplot.xlabel('abs(relative_difference(median(periodogram), median(autoregressive_spectrum)))')
    #    pyplot.ylabel('abs(relative_difference(median(autoregressive_spectrum), middle_value))')
    #    #pyplot.savefig('/storage/users/Muciaccia/media/spectra_selection.jpg')
    #    pyplot.show()
    #    #pyplot.close()

    #    noise_characterization['spectra_relative_difference'].append(spectra_relative_difference[is_not_empty])
    #    #noise_characterization['values_relative_difference'] = []
    #    noise_characterization['autoregressive_spectrum_median'].append(autoregressive_spectrum_median[is_not_empty])
    #    #noise_characterization['is_not_empty'].append(is_not_empty)
    #    noise_characterization['percentage_of_zeros'].append(percentage_of_zeros[is_not_empty])
    #    noise_characterization['is_science_ready'].append(is_science_ready[is_not_empty])
    #    noise_characterization['detector'].append(detector)

    #    pyplot.hist(spectra_relative_difference[is_not_empty], bins=100, range=[0,1]);pyplot.show()
    #    a = relative_difference(periodogram, autoregressive_spectrum) # without numpy.abs(...)
    #    b = a[is_not_empty]
    #    c = numpy.median(b, axis=1)
    #    pyplot.hist(c, bins=100, range=[-1,1]);pyplot.show()

    #    # TODO serve capire esattamente come sono calcolati periodogramma e spettro autoregressivo, perché spesso ci sono degli spettri che mi sembrano assolutamente buoni, ma che hanno una differenza apprezzabile tra le due curve: non vorrei che così facendo stiamo buttando inutilmente un sacco di dati buoni (magari la differenza tra le due curve è anche funzione della percentuale di zeri)

    #clean_power_spectrum = power_spectrum[is_science_ready]
    #clean_autoregressive_spectrum = autoregressive_spectrum[is_science_ready]
    #clean_periodogram = periodogram[is_science_ready]

    #clean_selected_power_spectrum = selected_power_spectrum[is_science_ready]
    #clean_selected_autoregressive_spectrum = selected_autoregressive_spectrum[is_science_ready]
    #clean_selected_periodogram = selected_periodogram[is_science_ready]

    #    if detector == 'Virgo': # TODO temporary hack for the VSR4 dataset
    #        desired_gps_start_time = astropy.time.Time(val='2017-01-01 00:00:00.000', format='iso', scale='utc').gps
    #        actual_gps_start_time = astropy.time.Time(val='2011-06-03 10:26:59.000', format='iso', scale='utc').gps
    #        gps_time_shift = desired_gps_start_time - actual_gps_start_time
    #        s['gps_time'] = s['gps_time'] + gps_time_shift # TODO c'è qui una differenza finale di 18 secondi

    # TODO il float32 è insufficiente a rappresentare il tempo GPS con la precisione dovuta, perché perde le ultime due cifre del tempo GPS (decine ed unità)
    # TODO dato che servirà calcolare su GPU e che dunque serve il float32, propondo di ridefinire lo standard temporale GPS a partire dall' 1 gennaio 2000, invece che dal 6 gennaio 1980, chiamandolo millennium_time
    # TODO vedere le time series di pandas e xarray come risolvono il problema
    # TODO data/values indexes labels/coordinates axis/dimensions
    # TODO pandas.CategoricalIndex pandas.IndexSlice pandas.IntervalIndex pandas.MultiIndex pandas.SparseArray pandas.TimedeltaIndex

    gps_time = astropy.time.Time(val=s['gps_time'], format='gps', scale='utc')
    gps_time_values = gps_time.value.astype(numpy.float64)  # TODO

    # ISO 8601 compliant date-time format: YYYY-MM-DD HH:MM:SS.sss
    iso_time_values = gps_time.iso
    # time of the first FFT of this file
    human_readable_start_time = iso_time_values[0]

    #clean_iso_time_values = iso_time_values[is_science_ready]

    fft_index = s[
        'fft_index'] - 1  # index in python start from 0 instead on 1, as in Matlab
    print('Processing', file_path)
    print('Good spectra:', len(fft_index[is_science_ready]), 'out of',
          len(fft_index))

    # TODO controllare che il valore medio sul plateau (10^-6) sia consistente con quanto scritto nella mia tesina

    #    hist_H = []
    #    hist_L = []
    #    hist_V = []
    #    if detector == 'LIGO Hanford':
    #        hist_H.append(numpy.log(autoregressive_spectrum_median))
    #    if detector == 'LIGO Livingston':
    #        hist_L.append(numpy.log(autoregressive_spectrum_median))
    #    pyplot.hist2d(x=H, y=L, bins=100, cmap='viridis')
    #    #pyplot.hist(numpy.log(numpy.median(clean_selected_periodogram, axis=1)), bins=100)
    #    #pyplot.show()
    #    # TODO fare istogrammi 2D per dimostrare la bontà delle superfici di separazione e dei tagli fatti
    #    # TODO mettere legenda per l'immagine a colori, col cerchio della sintesi additiva in SVG (su un'immagine quadrata di sfondo nero) con le lettere indicative dei detector (fare delle prove con un'immagine creata ad hoc). controllare che dove mancano tutti i dati le righe verticali siano nere (dovute agli zeri) e non bianche (dovute ai NaN)
    #    # TODO mettere finestra in frequenza per le immagini solo da 80 a 120 Hz e non su tutta la banda di 128 Hz (ed eventualmente ricomputare l'intervallo di 0.1 Hz)
    #    # TODO portare la creazione delle immagini su xarray, in modo che il calcolo possa essere fatto senza vincoli di memoria su qualsiasi computer
    #    # TODO vedere computazione out-of-memory per big-data su tensorflow
    #    # TODO posticipare la normalizzazione logaritmica a dopo che si fanno le injections
    #    # TODO classificare le immagini degli spettri per mostrare tutti i vari casi possibili (compreso quello degli zeri temporali, antitrasformando in Fourier)
    #    # TODO fare plot di k-fold validation con numpy.percentile([5, 25, 50, 75 95], data) in modo da evere la linea di mediana e le linee con confidenza al 50% e al 90%, come fanno gli astrofisici (sensatamente, ora capisco). [5, 25, 50, 75 95] = [50-90/2, 50-50/2, 50, 50+50/2, 50+90/2]
    #    # TODO poi, dopo la classificazione (trigger), fare regressione per paramenter estimation
    #    # TODO Ricci vuole dei tool per studiare il rumore online (nella fase di commissioning dell'interferometro)
    #    # TODO chiedere a Ornella di generare i dati più recenti
    #    # TODO mettere i dati di Virgo di VSR4 (o gli ultimi di O2)
    #    # TODO valutare se creare i file .netCDF4 direttamente in Matlab, in modo da risparmiare lo spazio dei file .mat
    #    img = numpy.zeros([100, 100, 3])
    #    img[19:79,0:59,0] = 1
    #    img[39:99,19:79,1] = 1
    #    img[0:59,39:99,2] = 1
    #    # img[0:59,0:59,0] = 1
    #    # img[19:79,19:79,1] = 1
    #    # img[39:99,39:99,2] = 1
    #    pyplot.imshow(img, origin="lower", interpolation="none")
    #    pyplot.show()

    plot_it = False

    if plot_it:
        for spectrum in selected_power_spectrum[
                is_science_ready]:  # clean_selected_power_spectrum
            pyplot.figure()
            pyplot.grid()
            pyplot.semilogy(selected_frequencies, spectrum)
            #pyplot.savefig('{}.svg'.format(i))
            pyplot.show()
            pyplot.close()

    plot_it = False

    if plot_it:
        #for i in range(len(fft_index[is_science_ready])): # TODO iterare direttamente su fft_index usando xarray
        #@numpy.vectorize # TODO BUG: ripete il primo elemento
        def my_plot_figure(i):
            print(i)
            fig, [total, zoom] = pyplot.subplots(nrows=2,
                                                 ncols=1,
                                                 figsize=[10, 10])
            #fig.suptitle(...)
            total.grid()
            zoom.grid()
            total.semilogy(frequencies,
                           power_spectrum[i],
                           label='Normalized Power FFT')
            total.semilogy(subsampled_frequencies,
                           autoregressive_spectrum[i],
                           color='#cc0000',
                           label='autoregressive spectrum')
            total.semilogy(subsampled_frequencies,
                           periodogram[i],
                           color='black',
                           label='periodogram')

            # draw a rectangle to highlight the zoomed part # TODO zorder
            total.add_patch(
                matplotlib.patches.Rectangle(xy=[80, 1e-12],
                                             width=120 - 80,
                                             height=1e-2 - 1e-12,
                                             fill=False,
                                             alpha=1.0,
                                             linewidth=3,
                                             edgecolor="darkgrey"))

            zoom.semilogy(selected_frequencies,
                          selected_power_spectrum[i],
                          label='Normalized Power FFT')
            zoom.semilogy(selected_subsampled_frequencies,
                          selected_autoregressive_spectrum[i],
                          color='#cc0000',
                          label='autoregressive spectrum')
            zoom.semilogy(selected_subsampled_frequencies,
                          selected_periodogram[i],
                          color='black',
                          label='periodogram')
            total.set_xlabel('Frequency [Hz]')
            zoom.set_xlabel('Frequency [Hz]')
            # TODO total.set_xlabel(...) # TODO amplitude spectral density VS strain VS 1/sqrt(Hz) VS 1/Hz
            # TODO zoom.set_xlabel(...)
            total.set_title('{} O2 C01 {} (0 Hz - 128 Hz)'.format(
                detector, iso_time_values[i]),
                            size=16)  # TODO hardcoded
            #zoom.set_title('Zoomed spectrum: (80 Hz - 120 Hz)') # TODO
            # TODO mettere limiti in x da 0 a 128 e farli combaciare col bordo figura
            total.legend(loc='upper right')
            #pyplot.show()
            pyplot.savefig(
                '/storage/users/Muciaccia/media/spectra selection/{}.jpg'.
                format(i))
            pyplot.close()

        my_plot_figure = numpy.frompyfunc(my_plot_figure, 1,
                                          1)  # TODO hack per vettorializzare
        #print(fft_index[is_science_ready])
        my_plot_figure(fft_index[is_science_ready])
        #my_plot_figure(63)
        # good_discarded = 97? // 48 95
        # bad_selected = 66? //
        # dunque questi criteri di selezione possono portare un 1% o più di falsi positivi e falsi negativi
        # TODO ottimizzare i tagli (oppure farli fare direttamente alla macchina)
        # TODO plottare falsi positivi e falsi negativi
        # TODO plottare anche i dati con i buchi nel tempo, per far vedere anche quel criterio di selezione

    # TODO BUG di numpy: si ripete il primo indice
    #@numpy.vectorize
    #def plot_figure(fft_index): # TODO così dovrebbe essere vettoriale
    #    print(fft_index)
    #plot_array = numpy.frompyfunc(plot_figure, 1,1)

    # TODO BUG di python
    # la definizione di funzioni dovrebbe essere effettuata tramite istanziazioni della classe function (tipo javascript?), che deve poter essere sostituita con altri tipi di classi più raffinate come le vectorized_function (numpy ufunc)

    # set all the flagged values to zero
    power_spectrum[is_flagged] = 0
    selected_power_spectrum[is_flagged] = 0
    autoregressive_spectrum[is_flagged] = 0
    selected_autoregressive_spectrum[is_flagged] = 0
    periodogram[is_flagged] = 0
    selected_periodogram[is_flagged] = 0

    # expand each value of the selected_autoregressive_spectrum 128 times
    # these values will be later used for the whitening of the data
    expander = numpy.ones(128, dtype=numpy.float32)  # TODO hardcoded
    expanded_selected_autoregressive_spectrum = numpy.einsum(
        'ij,k', selected_autoregressive_spectrum,
        expander).reshape(len(selected_autoregressive_spectrum), -1)

    selected_whitened_power_spectrum = selected_power_spectrum / expanded_selected_autoregressive_spectrum
    # exclude nan (=0/0) and inf (=x/0)
    excluded = numpy.logical_not(
        numpy.isfinite(selected_whitened_power_spectrum))
    selected_whitened_power_spectrum[excluded] = 0
    # NOTA: i valori sbiancati sono sempre per definizione maggiori di 0, quindi si può tranquillamente fare il logaritmo. dunque l'istogramma NON è una gaussiana centrata in zero, come mi aspettavo. il valore centrale del rapporto, per definizione, è 1 per cui il valore centrale del logaritmo è 0. l'istogramma dei logaritmi è una curva a campana asimmetrica, orientativamente centrata in 0. fare il logaritmo del rapporto è come fare la sottrazione dei due logaritmi (proprio come sottrarre le due curve nel grafico semilogy), MA bisogna utilizzare il primo metodo perché il segnale si dovrà iniettare PRIMA di fare il logaritmo
    # rumore bianco = piatto in frequenza
    # 'sigma' asimmetrica =? 68% 50% quartile larghezza_a_mezza_altezza
    # TODO i dati in logaritmo sono in unità di sigma? lo sbiancamento non dovrebbe risultare gaussiano? come si assegnano i valori di confidenza a 2.5 sigma se la distribuzione è asimmetrica? bisogna anche dividere per 8192/2? dato che l'operazione di whitening è una divisione in Fourier, si può mappare in una convoluzione nello spazio reale? come è fatto lo spettro autoregressivo? è frutto di una convolutione anche lui? "The resulting time series is no longer in units of strain; now in units of 'sigmas' away from the mean"

    # create a unitary structure to return

    selected_frequencies = numpy.single(selected_frequencies)  # float32
    #gps_time_values = numpy.double(gps_time_values) # float64 (questa precisione è assolutamente necessaria per evitare grossi errori di troncamento, dell'ordine delle decine di secondi)
    #datetimes = pandas.date_range(start=human_readable_start_time, freq='4096s', tz='UTC', periods=100) # 8192/2 = 4096
    datetimes = pandas.to_datetime(
        iso_time_values
    )  # TODO BUG: utc=True non funziona quando si inporta tutto in xarray (forse perché il dtype='datetime64[ns, UTC]' non è supportato)
    # dataset.time.dt.day
    # dataset.time.dt.dayofyear
    # http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    # netcdftime.DateFromJulianDay

    coordinate_names = ['frequency', 'time', 'detector'
                        ]  # 3 detectors, so we are preparing an RGB image
    coordinate_values = [selected_frequencies, datetimes, [detector]]
    attributes = {
        'FFT_lenght': fft_lenght,
        'observing_run': 'O2',  # TODO hardcoded (estrarlo dal file path)
        'calibration': 'C01',  # TODO hardcoded
        'maximum_frequency':
        maximum_frequency,  # TODO hardcoded (Nyquist_frequency = time_sampling_rate/2)
        # TODO mettere anche time_sampling_rate = 256 Hz (parametro importante)
        'start_ISO_time': human_readable_start_time
    }  # TODO metterlo come attibuto del singolo spettrogramma (e levarlo dal file complessivo)
    # TODO mettere anche tutti gli altri attributi interessanti come 'FFT_interlaced' = 1 # TODO BUG: True (booleano) non se lo piglia

    # TODO per evitare ripetizioni, prima definire i DataArray delle coordinate e poi chiamare quelli per i costrutti di tutte le altre variabili

    spectrogram = xarray.DataArray(
        data=numpy.expand_dims(numpy.transpose(selected_power_spectrum),
                               axis=-1),
        dims=coordinate_names,
        coords=coordinate_values)  #, attrs=attributes) #name='immagine'
    whitened_spectrogram = xarray.DataArray(data=numpy.expand_dims(
        numpy.transpose(selected_whitened_power_spectrum), axis=-1),
                                            dims=coordinate_names,
                                            coords=coordinate_values)
    locally_science_ready = xarray.DataArray(
        data=numpy.expand_dims(is_science_ready, axis=-1),
        dims=['time', 'detector'],
        coords=[datetimes, [detector]])  # TODO [detector] VS detector

    dataset = xarray.Dataset(data_vars={
        'spectrogram': spectrogram,
        'whitened_spectrogram': whitened_spectrogram,
        'locally_science_ready': locally_science_ready
    },
                             attrs=attributes)

    return dataset
Esempio n. 45
0
import matplotlib.pyplot as plt


def sierpinski(x, y):
    v = 2 / np.sqrt(3) * y
    u = x - 0.5 * v
    return sdf(u, v, 0)


def sdf(u, v, depth):
    if depth == 10:
        return 0
    if u < 0 or v < 0 or u + v > 1:
        return 1
    if u > 0.5:
        return sdf(2 * u - 1, 2 * v, depth + 1)
    if v > 0.5:
        return sdf(2 * u, 2 * v - 1, depth + 1)
    if u + v < 0.5:
        return sdf(2 * u, 2 * v, depth + 1)
    return 1


plt.figure(figsize=(10, 10))
x0, x1, y0, y1 = 0, 1, 0, 1
y, x = np.ogrid[y0:y1:1000j, x0:x1:1000j]
img = np.frompyfunc(sierpinski, 2, 1)(x, y).astype(np.float)
plt.imshow(img, extent=[x0, x1, y0, y1])
plt.gca().axis('off')
plt.show()
Esempio n. 46
0
 def update_pos(self, func):
     ufunc = np.frompyfunc(func, 3, 1)
     self.pvec = ufunc(self.vvec, self.vvec_old, self.pvec)
     self.vvec_old = np.copy(self.vvec)
Esempio n. 47
0
def main(args):
    args = docopt(__doc__, args)
    args['--cache-region'] = args['--cache-region'].lower()
    args['--ei-alg'] = args['--ei-alg'].lower()
    assert args['--ei-alg'] in ('ei_greedy', 'deim')
    args['--grid'] = int(args['--grid'])
    args['--grid-type'] = args['--grid-type'].lower()
    assert args['--grid-type'] in ('rect', 'tria')
    args['--initial-data'] = args['--initial-data'].lower()
    assert args['--initial-data'] in ('sin', 'bump')
    args['--lxf-lambda'] = float(args['--lxf-lambda'])
    args['--nt'] = int(args['--nt'])
    args['--not-periodic'] = bool(args['--not-periodic'])
    args['--num-flux'] = args['--num-flux'].lower()
    assert args['--num-flux'] in ('lax_friedrichs', 'engquist_osher')
    args['--plot-error-landscape-N'] = int(args['--plot-error-landscape-N'])
    args['--plot-error-landscape-M'] = int(args['--plot-error-landscape-M'])
    args['--test'] = int(args['--test'])
    args['--vx'] = float(args['--vx'])
    args['--vy'] = float(args['--vy'])
    args['--ipython-engines'] = int(args['--ipython-engines'])
    args['EXP_MIN'] = int(args['EXP_MIN'])
    args['EXP_MAX'] = int(args['EXP_MAX'])
    args['EI_SNAPSHOTS'] = int(args['EI_SNAPSHOTS'])
    args['EISIZE'] = int(args['EISIZE'])
    args['SNAPSHOTS'] = int(args['SNAPSHOTS'])
    args['RBSIZE'] = int(args['RBSIZE'])

    print('Setup Problem ...')
    problem = burgers_problem_2d(vx=args['--vx'],
                                 vy=args['--vy'],
                                 initial_data_type=args['--initial-data'],
                                 parameter_range=(args['EXP_MIN'],
                                                  args['EXP_MAX']),
                                 torus=not args['--not-periodic'])

    print('Discretize ...')
    if args['--grid-type'] == 'rect':
        args['--grid'] *= 1. / math.sqrt(2)
    fom, _ = discretize_instationary_fv(
        problem,
        diameter=1. / args['--grid'],
        grid_type=RectGrid if args['--grid-type'] == 'rect' else TriaGrid,
        num_flux=args['--num-flux'],
        lxf_lambda=args['--lxf-lambda'],
        nt=args['--nt'])

    if args['--cache-region'] != 'none':
        fom.enable_caching(args['--cache-region'])

    print(fom.operator.grid)

    print(f'The parameter type is {fom.parameter_type}')

    if args['--plot-solutions']:
        print('Showing some solutions')
        Us = ()
        legend = ()
        for mu in fom.parameter_space.sample_uniformly(4):
            print(f"Solving for exponent = {mu['exponent']} ... ")
            sys.stdout.flush()
            Us = Us + (fom.solve(mu), )
            legend = legend + (f"exponent: {mu['exponent']}", )
        fom.visualize(Us,
                      legend=legend,
                      title='Detailed Solutions',
                      block=True)

    pool = new_parallel_pool(ipython_num_engines=args['--ipython-engines'],
                             ipython_profile=args['--ipython-profile'])
    eim, ei_data = interpolate_operators(
        fom,
        ['operator'],
        fom.parameter_space.sample_uniformly(args['EI_SNAPSHOTS']),  # NOQA
        error_norm=fom.l2_norm,
        product=fom.l2_product,
        max_interpolation_dofs=args['EISIZE'],
        alg=args['--ei-alg'],
        pool=pool)

    if args['--plot-ei-err']:
        print('Showing some EI errors')
        ERRs = ()
        legend = ()
        for mu in fom.parameter_space.sample_randomly(2):
            print(f"Solving for exponent = \n{mu['exponent']} ... ")
            sys.stdout.flush()
            U = fom.solve(mu)
            U_EI = eim.solve(mu)
            ERR = U - U_EI
            ERRs = ERRs + (ERR, )
            legend = legend + (f"exponent: {mu['exponent']}", )
            print(f'Error: {np.max(fom.l2_norm(ERR))}')
        fom.visualize(ERRs,
                      legend=legend,
                      title='EI Errors',
                      separate_colorbars=True)

        print('Showing interpolation DOFs ...')
        U = np.zeros(U.dim)
        dofs = eim.operator.interpolation_dofs
        U[dofs] = np.arange(1, len(dofs) + 1)
        U[eim.operator.source_dofs] += int(len(dofs) / 2)
        fom.visualize(fom.solution_space.make_array(U),
                      title='Interpolation DOFs')

    print('RB generation ...')

    reductor = InstationaryRBReductor(eim)

    greedy_data = rb_greedy(fom,
                            reductor,
                            fom.parameter_space.sample_uniformly(
                                args['SNAPSHOTS']),
                            use_estimator=False,
                            error_norm=lambda U: np.max(fom.l2_norm(U)),
                            extension_params={'method': 'pod'},
                            max_extensions=args['RBSIZE'],
                            pool=pool)

    rom = greedy_data['rom']

    print('\nSearching for maximum error on random snapshots ...')

    tic = time.time()

    mus = fom.parameter_space.sample_randomly(args['--test'])

    def error_analysis(N, M):
        print(f'N = {N}, M = {M}: ', end='')
        rom = reductor.reduce(N)
        rom = rom.with_(operator=rom.operator.with_cb_dim(M))
        l2_err_max = -1
        mumax = None
        for mu in mus:
            print('.', end='')
            sys.stdout.flush()
            u = rom.solve(mu)
            URB = reductor.reconstruct(u)
            U = fom.solve(mu)
            l2_err = np.max(fom.l2_norm(U - URB))
            l2_err = np.inf if not np.isfinite(l2_err) else l2_err
            if l2_err > l2_err_max:
                l2_err_max = l2_err
                mumax = mu
        print()
        return l2_err_max, mumax

    error_analysis = np.frompyfunc(error_analysis, 2, 2)

    real_rb_size = len(reductor.bases['RB'])
    real_cb_size = len(ei_data['basis'])
    if args['--plot-error-landscape']:
        N_count = min(real_rb_size - 1, args['--plot-error-landscape-N'])
        M_count = min(real_cb_size - 1, args['--plot-error-landscape-M'])
        Ns = np.linspace(1, real_rb_size, N_count).astype(np.int)
        Ms = np.linspace(1, real_cb_size, M_count).astype(np.int)
    else:
        Ns = np.array([real_rb_size])
        Ms = np.array([real_cb_size])

    N_grid, M_grid = np.meshgrid(Ns, Ms)

    errs, err_mus = error_analysis(N_grid, M_grid)
    errs = errs.astype(np.float)

    l2_err_max = errs[-1, -1]
    mumax = err_mus[-1, -1]
    toc = time.time()
    t_est = toc - tic

    print('''
    *** RESULTS ***

    Problem:
       parameter range:                    ({args[EXP_MIN]}, {args[EXP_MAX]})
       h:                                  sqrt(2)/{args[--grid]}
       grid-type:                          {args[--grid-type]}
       initial-data:                       {args[--initial-data]}
       lxf-lambda:                         {args[--lxf-lambda]}
       nt:                                 {args[--nt]}
       not-periodic:                       {args[--not-periodic]}
       num-flux:                           {args[--num-flux]}
       (vx, vy):                           ({args[--vx]}, {args[--vy]})

    Greedy basis generation:
       number of ei-snapshots:             {args[EI_SNAPSHOTS]}
       prescribed collateral basis size:   {args[EISIZE]}
       actual collateral basis size:       {real_cb_size}
       number of snapshots:                {args[SNAPSHOTS]}
       prescribed basis size:              {args[RBSIZE]}
       actual basis size:                  {real_rb_size}
       elapsed time:                       {greedy_data[time]}

    Stochastic error estimation:
       number of samples:                  {args[--test]}
       maximal L2-error:                   {l2_err_max}  (mu = {mumax})
       elapsed time:                       {t_est}
    '''.format(**locals()))

    sys.stdout.flush()
    if args['--plot-error-landscape']:
        import matplotlib.pyplot as plt
        import mpl_toolkits.mplot3d  # NOQA
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        # we have to rescale the errors since matplotlib does not support logarithmic scales on 3d plots
        # https://github.com/matplotlib/matplotlib/issues/209
        surf = ax.plot_surface(M_grid,
                               N_grid,
                               np.log(np.minimum(errs, 1)) / np.log(10),
                               rstride=1,
                               cstride=1,
                               cmap='jet')
        plt.show()
    if args['--plot-err']:
        U = fom.solve(mumax)
        URB = reductor.reconstruct(rom.solve(mumax))
        fom.visualize(
            (U, URB, U - URB),
            legend=('Detailed Solution', 'Reduced Solution', 'Error'),
            title='Maximum Error Solution',
            separate_colorbars=True)

    return ei_data, greedy_data
Esempio n. 48
0
n_grams = list(
    everygrams(text_split_list, min_len=1,
               max_len=7))  # invert to iterate from 7-grams to 1-grams

n_grams_freq = FreqDist(n_grams)

n_grams = sorted(
    set(n_grams), key=len, reverse=True
)  # remove duplicates of n_grams using set, and sorting once again from 7-grams to 1-grams

word_count = len(text_split_list)

seq = dict()

# Returns a universal function (ufunc) object, add broadcasting to a python function, faster than applying a for loop or a map
get_size = np.frompyfunc(len, 1, 1)

one_gram_index = np.argmax(
    get_size(n_grams) < 2
)  # for n_gram with n > 2, because the cohesion is not calculated for n = 1, so get the index where the 1-grams start and exclude those from our searching space

mwu = dict()  # relevant_expresions (multi-word units)

# save methods inside variables saves the time calling function each time we need it
get_entry = seq.get
cohesion_gram = cohesion_measures
update = mwu.update

# some punctuation signs are for spanish language, like ¿? ¡!
re_punctuation = re.compile(
    "[;:!?<>&\(\)\[\]\"\.,=/\\\^\$\*\+\|\{\}\%\'\’\-\“\”\—\–\§\¿?¡!]")
Esempio n. 49
0
    def add_func(self, src_dict, key, value):

        func = np.frompyfunc(self.add, 3, 1)
        return func(src_dict, key, value)
Esempio n. 50
0
def main(ulog_file):

    log_file = pyulog.ULog(ulog_file)

    create_plots(log_file, 'vehicle_global_position',
                 ['alt', 'pressure_alt', 'terrain_alt'])
    create_plots(
        log_file, 'vehicle_command',
        ['param1', 'param2', 'param3', 'param4', 'param5', 'param6', 'param7'])
    create_plots(log_file,
                 'vehicle_attitude', ['rollspeed', 'pitchspeed', 'yawspeed'],
                 file_name='vehicle_attitude_rates')
    create_plots(log_file, 'actuator_outputs', [
        'output[0]', 'output[1]', 'output[2]', 'output[3]', 'output[4]',
        'output[5]'
    ])

    # list of parameters recorded in the log
    print_available_parameters(log_file)

    # vehicle_attitude
    vehicle_attitude = log_file.get_dataset('vehicle_attitude')
    time_data = vehicle_attitude.data['timestamp']
    q0 = vehicle_attitude.data['q[0]']
    q1 = vehicle_attitude.data['q[1]']
    q2 = vehicle_attitude.data['q[2]']
    q3 = vehicle_attitude.data['q[3]']
    quaternion2euler_array = np.frompyfunc(quaternion2euler, 4, 3)
    roll, pitch, yaw = quaternion2euler_array(q0, q1, q2, q3)
    fig, axs = plt.subplots(3, 1, sharex='all')
    axs[0].set_title('vehicle_attitude')
    axs[0].plot(time_data, roll * __rad2deg__, drawstyle='steps-post')
    axs[0].set_ylabel('Roll')
    axs[0].grid(True)
    axs[1].plot(time_data, pitch * __rad2deg__, drawstyle='steps-post')
    axs[1].set_ylabel('Pitch')
    axs[1].grid(True)
    axs[2].plot(time_data, yaw * __rad2deg__, drawstyle='steps-post')
    axs[2].set_ylabel('Yaw')
    axs[2].grid(True)
    fig.tight_layout()
    fig.savefig(f"vehicle_attitude.pdf",
                dpi=None,
                facecolor='w',
                edgecolor='w',
                orientation='portrait',
                papertype='a4',
                format='pdf',
                transparent=False,
                bbox_inches=None,
                pad_inches=0.1,
                frameon=None,
                metadata=None)

    # VERTICAL AXIS
    actuator_outputs = log_file.get_dataset('actuator_outputs')
    time_data_outputs = actuator_outputs.data['timestamp']
    # motors 2 & 3
    left_motors = (actuator_outputs.data['output[1]'] +
                   actuator_outputs.data['output[2]']) / 2
    # motors 1 & 4
    right_motors = (actuator_outputs.data['output[0]'] +
                    actuator_outputs.data['output[3]']) / 2
    thrust = (actuator_outputs.data['output[1]'] +
              actuator_outputs.data['output[2]'] +
              actuator_outputs.data['output[0]'] +
              actuator_outputs.data['output[3]']) / 4
    fig, axs = plt.subplots(2, 1, sharex='all')
    axs[0].set_title('Vertical axis')
    axs[0].plot(time_data_outputs, thrust, drawstyle='steps-post')
    axs[0].set_ylabel('Thrust')
    axs[0].grid(True)
    vehicle_global_position = log_file.get_dataset('vehicle_global_position')
    time_global_position = vehicle_global_position.data['timestamp']
    alt = vehicle_global_position.data['alt']
    axs[1].plot(time_global_position, alt, drawstyle='steps-post')
    axs[1].set_ylabel('Altitude')
    axs[1].grid(True)
    fig.tight_layout()
    fig.savefig(f"vertical_axis.pdf",
                dpi=None,
                facecolor='w',
                edgecolor='w',
                orientation='portrait',
                papertype='a4',
                format='pdf',
                transparent=False,
                bbox_inches=None,
                pad_inches=0.1,
                frameon=None,
                metadata=None)

    # LATERAL AXIS
    fig, axs = plt.subplots(4, 1, sharex='all')
    axs[0].set_title('Lateral axis')
    axs[0].plot(time_data, roll * __rad2deg__, drawstyle='steps-post')
    axs[0].set_ylabel('Roll')
    axs[0].grid(True)
    axs[1].plot(time_data_outputs, left_motors, drawstyle='steps-post')
    axs[1].set_ylabel('Left motors')
    axs[1].grid(True)
    axs[2].plot(time_data_outputs, right_motors, drawstyle='steps-post')
    axs[2].set_ylabel('Right motors')
    axs[2].grid(True)
    axs[3].plot(time_data_outputs,
                left_motors - right_motors,
                drawstyle='steps-post')
    axs[3].grid(True)
    fig.tight_layout()
    fig.savefig(f"lateral_axis.pdf",
                dpi=None,
                facecolor='w',
                edgecolor='w',
                orientation='portrait',
                papertype='a4',
                format='pdf',
                transparent=False,
                bbox_inches=None,
                pad_inches=0.1,
                frameon=None,
                metadata=None)
Esempio n. 51
0
def predict_by_lr_coef(test_feature, lr_coef):
    """通过模型文件(进行预测"""
    sigmod_func = np.frompyfunc(simoid, 1,
                                1)  # 可以对array每个元素 进行相应的函数 操作,1,1,表示一个输入,一个输出
    return sigmod_func(np.dot(test_feature, lr_coef))
Esempio n. 52
0
import matplotlib.pyplot as plt
import numpy as np
import math
pi = math.pi
t = np.arange(0,2*pi,0.001)
r = np.frompyfunc(round,1,1)
sin = np.frompyfunc(math.sin,1,1)
cos = np.frompyfunc(math.cos,1,1)
#y = sin(x)*(abs(cos(x)))**0.5/(sin(x)+1.4)-2*sin(x)+2
'''
y = 2*cos(t)-cos(2*t)
x = 2*sin(t)-sin(2*t)
'''
x=sin(t)
c=cos(t)
p=x**2
y=c+p**(1/3)
plt.plot(x, y,color='r', linewidth=9)
plt.fill(x,y,color='r')
plt.title("my heart for you")
ax = plt.subplot(111)
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
plt.xlim(-1.2,1.2)
plt.xticks([])
plt.yticks([])
plt.show()
Esempio n. 53
0
    plt.tick_params(**CMS.axis_label_minor)

    ax0.xaxis.set_major_formatter(FormatStrFormatter('%d'))
    ax0.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    ax0.xaxis.labelpad = 11
    #ax0.yaxis.labelpad = 20

    rplt.errorbar(plot_efficiency, xerr=True, emptybins=True, axes=ax0)

    ax0.set_xlim(x_limits)
    ax0.set_ylim([0.0, 1.1])

    #add fits
    x = numpy.linspace(fit_data.GetXmin(), fit_data.GetXmax(),
                       fit_data.GetNpx())
    function_data = frompyfunc(fit_data.Eval, 1, 1)
    plot(x, function_data(x), axes=ax0, color='red', linewidth=2)

    plt.tick_params(**CMS.axis_label_major)
    plt.tick_params(**CMS.axis_label_minor)

    plt.xlabel(x_title, CMS.x_axis_title)
    plt.ylabel(y_title, CMS.y_axis_title)
    plt.title(r'e+jets, CMS Preliminary, $\sqrt{s}$ = 8 TeV', CMS.title)
    plt.legend(['data', 'fit'],
               numpoints=1,
               loc='lower right',
               prop=CMS.legend_properties)

    #add fit formulas
    ax0.text(0.2,
Esempio n. 54
0
def _maybe_convert(values, val_kind):
    if _need_convert(val_kind):
        conv = _get_converter(val_kind)
        conv = np.frompyfunc(conv, 1, 1)
        values = conv(values)
    return values
Esempio n. 55
0
def ebeida_sampling(
    sizeI,
    spacing,
    nPts,
    showIter,
    ftests=[],
    discount_factor=0.5,
    stop_method="density",
):

    # Setting properties of iteration
    ndim = len(sizeI)
    cellsize = spacing / np.sqrt(ndim)
    fgrid = eval("make_{}d_grid".format(ndim))
    dlat = eval("dlat{}".format(ndim)).ravel()

    gen_history = np.zeros(10)

    # Make grid size such that there is just one pt in each grid
    dcell = spacing / np.sqrt(ndim)

    # Make a grid and convert it into a nxD array
    s_cell = fgrid(sizeI, cellsize)
    s_cell = np.array([s_cell[i][:].flatten() for i in range(ndim)]).T
    grid = np.arange(s_cell.shape[0]).astype(int)

    # Thrown in a particular grid
    n_empty_cells = n_empty_cells0 = s_cell.shape[0]

    # Initialize Parameters
    if nPts == 0:
        nPts = n_empty_cells
    n_pts_created = 0
    n_pts_newly_created = 0
    pts = np.empty(shape=(1, ndim))
    # iter = 0

    # Start Iterative process
    pcl = PointCloud(pts, spacing)
    nn2 = NearestNeighbors(radius=spacing, algorithm="kd_tree", leaf_size=50)

    if ftests != []:
        for ftest in ftests:
            is_cell_uncovered = ftest.test_cells(s_cell, dcell)

            s_cell = s_cell[is_cell_uncovered, :]
            grid = grid[is_cell_uncovered]
            n_empty_cells = np.sum(s_cell.shape[0])

    if showIter:
        pbar = tqdm(total=nPts)

    if stop_method == 'density':
        fcontinue_criterion = lambda n_pts, n_empty: n_pts < nPts and n_empty > 0
    elif stop_method == 'maximal':
        fcontinue_criterion = lambda n_pts, n_empty: n_empty > 0
    else:
        raise RuntimeError(f'Unknown stopping criterion: {stop_method}')

    while fcontinue_criterion(n_pts_created, n_empty_cells):
        # Thrown darts in eligible grids

        ndarts = set_nDarts(nPts, n_pts_created, n_pts_newly_created,
                            n_empty_cells)
        if ndarts != s_cell.shape[0]:
            p = np.random.choice(range(s_cell.shape[0]), ndarts, replace=False)
        else:
            p = range(s_cell.shape[0])

        is_safe_to_continue = 1

        tempPts = s_cell[p, :] + dcell * np.random.rand(len(p), ndim)
        temp_grids = grid[p]

        if ftests != []:
            is_safe_with_prev_pts = np.ones(len(p), dtype=bool)
            for ftest in ftests:
                is_safe_with_prev_pts = is_safe_with_prev_pts * ftest.test_points(
                    tempPts)

            p = p[is_safe_with_prev_pts]
            tempPts = tempPts[is_safe_with_prev_pts, :]
            temp_grids = temp_grids[is_safe_with_prev_pts]

        is_safe_to_continue = p.size  # tempPts.shape[0]

        if is_safe_to_continue > 0 and n_pts_created > 0:
            is_safe_with_prev_pts = pcl.test_points(tempPts)
            is_safe_to_continue = np.sum(is_safe_with_prev_pts)

            p = p[is_safe_with_prev_pts]
            tempPts = tempPts[is_safe_with_prev_pts, :]
            temp_grids = temp_grids[is_safe_with_prev_pts]

        _, ind = np.unique(temp_grids, return_index=True)
        is_unlocked = np.isin(range(p.size), ind)
        is_safe_to_continue = np.sum(is_unlocked)
        p = p[is_unlocked]
        tempPts = tempPts[is_unlocked, :]
        temp_grids = temp_grids[is_unlocked]

        if is_safe_to_continue > 0:
            # find colliding pairs and leave only one of the pairs
            nn2.fit(tempPts)
            ind = nn2.radius_neighbors(tempPts, return_distance=False)

            is_eligible = np.frompyfunc(lambda i: (ind[i] < i).sum() == 0, 1,
                                        1)(np.arange(ind.size)).astype(bool)

            n_pts_newly_created = np.sum(is_eligible)
            rejection_rate = 1 - n_pts_newly_created / ndarts

            gen_history = np.roll(gen_history, 1)
            gen_history[0] = n_pts_newly_created

            if n_pts_newly_created > 0:
                accepted_pts = tempPts[is_eligible, :]
                accepted_grids = temp_grids[is_eligible]

                is_grid_unmarked = ~np.isin(grid, accepted_grids)
                s_cell = s_cell[is_grid_unmarked, :]
                grid = grid[is_grid_unmarked]

                # Update quantities for next iterations
                n_empty_cells = s_cell.shape[0]
                if n_pts_created == 0:
                    pcl.update_points(accepted_pts)
                else:
                    pcl.append_points(accepted_pts)

                n_pts_created = pcl.points.shape[0]

                if showIter:
                    pbar.update(n_pts_newly_created)
                    # print('n_pts_created = ', n_pts_created, '/', nPts)

        is_safe_to_continue = s_cell.shape[0]

        # if is_safe_to_continue and n_pts_newly_created/nPts<0.0006:
        if is_safe_to_continue and n_pts_newly_created < 0.0006 * nPts:
            print("Splitting grids...")
            dcell = dcell / 2
            s_cell = (np.tile(s_cell, (1, 2**ndim)) + dlat * dcell).reshape(
                (-1, ndim))
            grid = np.repeat(grid, 2**ndim)
            n_empty_cells0 = np.sum(s_cell.shape[0])
            assert grid.size == n_empty_cells0

            if ftests != []:
                for ftest in ftests:
                    is_cell_uncovered = ftest.test_cells(s_cell, dcell)

                    s_cell = s_cell[is_cell_uncovered, :]
                    grid = grid[is_cell_uncovered]
                    n_empty_cells = np.sum(s_cell.shape[0])

            n_empty_cells0 = np.sum(s_cell.shape[0])
            is_cell_uncovered = pcl.test_cells(s_cell, dcell)

            s_cell = s_cell[is_cell_uncovered, :]
            grid = grid[is_cell_uncovered]
            n_empty_cells = n_empty_cells0 = np.sum(s_cell.shape[0])

    pts = pcl.points
    if stop_method == 'density' and pts.shape[0] > nPts:
        p = np.arange(pts.shape[0])
        p = np.random.choice(p, nPts, replace=False)
        pts = pts[p, :]

    if showIter:
        pbar.close()

    return pts
Esempio n. 56
0
def density2d(data,
              channels=[0, 1],
              bins=1024,
              gate_fraction=0.65,
              xscale='logicle',
              yscale='logicle',
              sigma=10.0,
              full_output=False):
    """
    Gate that preserves events in the region with highest density.

    Gate out all events in `data` but those near regions of highest
    density for the two specified channels.

    Parameters
    ----------
    data : FCSData or numpy array
        NxD flow cytometry data where N is the number of events and D is
        the number of parameters (aka channels).
    channels : list of int, list of str, optional
        Two channels on which to perform gating.
    bins : int or array_like or [int, int] or [array, array], optional
        Bins used for gating:

          - If None, use ``data.hist_bins`` to obtain bin edges for both
            axes. None is not allowed if ``data.hist_bins`` is not
            available.
          - If int, `bins` specifies the number of bins to use for both
            axes. If ``data.hist_bins`` exists, it will be used to generate
            a number `bins` of bins.
          - If array_like, `bins` directly specifies the bin edges to use
            for both axes.
          - If [int, int], each element of `bins` specifies the number of
            bins for each axis. If ``data.hist_bins`` exists, use it to
            generate ``bins[0]`` and ``bins[1]`` bin edges, respectively.
          - If [array, array], each element of `bins` directly specifies
            the bin edges to use for each axis.
          - Any combination of the above, such as [int, array], [None,
            int], or [array, int]. In this case, None indicates to generate
            bin edges using ``data.hist_bins`` as above, int indicates the
            number of bins to generate, and an array directly indicates the
            bin edges. Note that None is not allowed if ``data.hist_bins``
            does not exist.
    gate_fraction : float, optional
        Fraction of events to retain after gating. Should be between 0 and
        1, inclusive.
    xscale : str, optional
        Scale of the bins generated for the x axis, either ``linear``,
        ``log``, or ``logicle``. `xscale` is ignored in `bins` is an array
        or a list of arrays.
    yscale : str, optional
        Scale of the bins generated for the y axis, either ``linear``,
        ``log``, or ``logicle``. `yscale` is ignored in `bins` is an array
        or a list of arrays.
    sigma : scalar or sequence of scalars, optional
        Standard deviation for Gaussian kernel used by
        `scipy.ndimage.filters.gaussian_filter` to smooth 2D histogram
        into a density.
    full_output : bool, optional
        Flag specifying to return additional outputs. If true, the outputs
        are given as a namedtuple.

    Returns
    -------
    gated_data : FCSData or numpy array
        Gated flow cytometry data of the same format as `data`.
    mask : numpy array of bool, only if ``full_output==True``
        Boolean gate mask used to gate data such that ``gated_data =
        data[mask]``.
    contour : list of 2D numpy arrays, only if ``full_output==True``
        List of 2D numpy array(s) of x-y coordinates tracing out
        the edge of the gated region.

    Raises
    ------
    ValueError
        If more or less than 2 channels are specified.
    ValueError
        If `data` has less than 2 dimensions or less than 2 events.
    Exception
        If an unrecognized matplotlib Path code is encountered when
        attempting to generate contours.

    Notes
    -----
    The algorithm for gating based on density works as follows:

        1) Calculate 2D histogram of `data` in the specified channels.
        2) Map each event from `data` to its histogram bin (implicitly
           gating out any events which exist outside specified `bins`).
        3) Use `gate_fraction` to determine number of events to retain
           (rounded up). Only events which are not implicitly gated out
           are considered.
        4) Smooth 2D histogram using a 2D Gaussian filter.
        5) Normalize smoothed histogram to obtain valid probability mass
           function (PMF).
        6) Sort bins by probability.
        7) Accumulate events (starting with events belonging to bin with
           highest probability ("densest") and proceeding to events
           belonging to bins with lowest probability) until at least the
           desired number of events is achieved. While the algorithm
           attempts to get as close to `gate_fraction` fraction of events
           as possible, more events may be retained based on how many
           events fall into each histogram bin (since entire bins are
           retained at a time, not individual events).

    """

    # Extract channels in which to gate
    if len(channels) != 2:
        raise ValueError('2 channels should be specified')
    data_ch = data[:, channels]
    if data_ch.ndim == 1:
        data_ch = data_ch.reshape((-1, 1))

    # Check gating fraction
    if gate_fraction < 0 or gate_fraction > 1:
        raise ValueError('gate fraction should be between 0 and 1, inclusive')

    # Check dimensions
    if data_ch.ndim < 2:
        raise ValueError('data should have at least 2 dimensions')
    if data_ch.shape[0] <= 1:
        raise ValueError('data should have more than one event')

    # Build output namedtuple if necessary
    if full_output:
        Density2dGateOutput = collections.namedtuple(
            'Density2dGateOutput', ['gated_data', 'mask', 'contour'])

    # If ``data_ch.hist_bins()`` exists, obtain bin edges from it if
    # necessary.
    if hasattr(data_ch, 'hist_bins') and \
            hasattr(data_ch.hist_bins, '__call__'):
        # Check whether `bins` contains information for one or two axes
        if hasattr(bins, '__iter__') and len(bins) == 2:
            # `bins` contains separate information for both axes
            # If bins for the X axis is not an iterable, get bin edges from
            # ``data_ch.hist_bins()``.
            if not hasattr(bins[0], '__iter__'):
                bins[0] = data_ch.hist_bins(channels=0,
                                            nbins=bins[0],
                                            scale=xscale)
            # If bins for the Y axis is not an iterable, get bin edges from
            # ``data_ch.hist_bins()``.
            if not hasattr(bins[1], '__iter__'):
                bins[1] = data_ch.hist_bins(channels=1,
                                            nbins=bins[1],
                                            scale=yscale)
        else:
            # `bins` contains information for one axis, which will be used
            # twice.
            # If bins is not an iterable, get bin edges from
            # ``data_ch.hist_bins()``.
            if not hasattr(bins, '__iter__'):
                bins = [
                    data_ch.hist_bins(channels=0, nbins=bins, scale=xscale),
                    data_ch.hist_bins(channels=1, nbins=bins, scale=yscale)
                ]

    # Make 2D histogram
    H, xe, ye = np.histogram2d(data_ch[:, 0], data_ch[:, 1], bins=bins)

    # Map each event to its histogram bin by sorting events into a 2D array of
    # lists which mimics the histogram.
    #
    # Use np.digitize to calculate the histogram bin index for each event
    # given the histogram bin edges. Note that the index returned by
    # np.digitize is such that bins[i-1] <= x < bins[i], whereas indexing the
    # histogram will result in the following: hist[i,j] = bin corresponding to
    # xedges[i] <= x < xedges[i+1] and yedges[i] <= y < yedges[i+1].
    # Therefore, we need to subtract 1 from the np.digitize result to be able
    # to index into the appropriate bin in the histogram.
    event_indices = np.arange(data_ch.shape[0])
    x_bin_indices = np.digitize(data_ch[:, 0], bins=xe) - 1
    y_bin_indices = np.digitize(data_ch[:, 1], bins=ye) - 1

    # In the current version of numpy, there exists a disparity in how
    # np.histogram and np.digitize treat the rightmost bin edge (np.digitize
    # is not the strict inverse of np.histogram). Specifically, np.histogram
    # treats the rightmost bin interval as fully closed (rightmost bin edge is
    # included in rightmost bin), whereas np.digitize treats all bins as
    # half-open (you can specify which side is closed and which side is open;
    # `right` parameter). The expected behavior for this gating function is to
    # mimic np.histogram behavior, so we must reconcile this disparity.
    x_bin_indices[data_ch[:, 0] == xe[-1]] = len(xe) - 2
    y_bin_indices[data_ch[:, 1] == ye[-1]] = len(ye) - 2

    # Ignore (gate out) events which exist outside specified bins.
    # `np.digitize()-1` will assign events less than `bins` to bin "-1" and
    # events greater than `bins` to len(bins)-1.
    outlier_mask = ((x_bin_indices == -1) | (x_bin_indices == len(xe) - 1) |
                    (y_bin_indices == -1) | (y_bin_indices == len(ye) - 1))

    event_indices = event_indices[~outlier_mask]
    x_bin_indices = x_bin_indices[~outlier_mask]
    y_bin_indices = y_bin_indices[~outlier_mask]

    # Create a 2D array of lists mimicking the histogram to accumulate events
    # associated with each bin.
    filler = np.frompyfunc(lambda x: list(), 1, 1)
    H_events = np.empty_like(H, dtype=np.object)
    filler(H_events, H_events)

    for event_idx, x_bin_idx, y_bin_idx in \
            zip(event_indices, x_bin_indices, y_bin_indices):
        H_events[x_bin_idx, y_bin_idx].append(event_idx)

    # Determine number of events to keep. Only consider events which have not
    # been thrown out as outliers.
    n = int(np.ceil(gate_fraction * float(len(event_indices))))

    # n = 0 edge case (e.g. if gate_fraction = 0.0); incorrectly handled below
    if n == 0:
        mask = np.zeros(shape=data_ch.shape[0], dtype=bool)
        gated_data = data[mask]
        if full_output:
            return Density2dGateOutput(gated_data=gated_data,
                                       mask=mask,
                                       contour=[])
        else:
            return gated_data

    # Smooth 2D histogram
    sH = scipy.ndimage.filters.gaussian_filter(H,
                                               sigma=sigma,
                                               order=0,
                                               mode='constant',
                                               cval=0.0,
                                               truncate=6.0)

    # Normalize smoothed histogram to make it a valid probability mass function
    D = sH / np.sum(sH)

    # Sort bins by density
    vD = D.ravel()
    vH = H.ravel()
    sidx = np.argsort(vD)[::-1]
    svH = vH[sidx]  # linearized counts array sorted by density

    # Find minimum number of accepted bins needed to reach specified number
    # of events
    csvH = np.cumsum(svH)
    Nidx = np.nonzero(csvH >= n)[0][0]  # we want to include this index

    # Get indices of events to keep
    vH_events = H_events.ravel()
    accepted_indices = vH_events[sidx[:(Nidx + 1)]]
    accepted_indices = np.array([
        item  # flatten list of lists
        for sublist in accepted_indices for item in sublist
    ])
    accepted_indices = np.sort(accepted_indices)

    # Convert list of accepted indices to boolean mask array
    mask = np.zeros(shape=data.shape[0], dtype=bool)
    mask[accepted_indices] = True

    gated_data = data[mask]

    if full_output:
        # Use scikit-image to find the contour of the gated region
        #
        # To find the contour of the gated region, values in the 2D probability
        # mass function ``D`` are used to trace contours at the level of the
        # probability associated with the last accepted bin, ``vD[sidx[Nidx]]``.

        # find_contours() specifies contours as collections of row and column
        # indices into the density matrix. The row or column index may be
        # interpolated (i.e. non-integer) for greater precision.
        contours_ij = skimage.measure.find_contours(D, vD[sidx[Nidx]])

        # Map contours from indices into density matrix to histogram x and y
        # coordinate spaces (assume values in the density matrix are associated
        # with histogram bin centers).
        xc = (xe[:-1] + xe[1:]) / 2.0  # x-axis bin centers
        yc = (ye[:-1] + ye[1:]) / 2.0  # y-axis bin centers

        contours = [
            np.array([
                np.interp(contour_ij[:, 0], np.arange(len(xc)), xc),
                np.interp(contour_ij[:, 1], np.arange(len(yc)), yc)
            ]).T for contour_ij in contours_ij
        ]

        return Density2dGateOutput(gated_data=gated_data,
                                   mask=mask,
                                   contour=contours)
    else:
        return gated_data
Esempio n. 57
0
n_iter = 5

startepoch = pycdf.lib.datetime_to_epoch(datetime.datetime.now())
epochs = numpy.arange(startepoch, startepoch + n_epochs)
command = 'vepoch_to_datetime(epochs)'
setup = 'from __main__ import epochs, vepoch_to_datetime'

print('Tests of EPOCH')
#case 1: vectorize
vepoch_to_datetime = numpy.vectorize(pycdf.lib.epoch_to_datetime)
timing = timeit.timeit(command, setup, number=n_iter)
print('vectorize: {0} loops of {1} took {2} seconds.'.format(
    n_iter, n_epochs, timing))

#case 2: ufunc
vepoch_to_datetime = numpy.frompyfunc(pycdf.lib.epoch_to_datetime, 1, 1)
timing = timeit.timeit(command, setup, number=n_iter)
print('ufunc: {0} loops of {1} took {2} seconds.'.format(
    n_iter, n_epochs, timing))


#case 3: ndindex
def vepoch_to_datetime(inarray):
    outarray = numpy.empty(inarray.shape, dtype='O')
    for idx in numpy.ndindex(inarray.shape):
        outarray[idx] == pycdf.lib.epoch_to_datetime(inarray[idx])
    return outarray


timing = timeit.timeit(command, setup, number=n_iter)
print('ndindex: {0} loops of {1} took {2} seconds.'.format(
Esempio n. 58
0
 def _refresh_fired(self):
     xdata = linspace(0.001, 10, 10000)
     fneval = frompyfunc(lambda x: eval(self.expression), 1, 1)
     ydata = fneval(xdata)
     self.mfn.set(xdata=xdata, ydata=ydata)
     self.mfn.data_changed = True
Esempio n. 59
0
    return v**(2 * H) * covW_fun_aux(v / u)


def covWZ_fun(u, v):
    H_tilde = H + .5
    D = np.sqrt(2 * H) / H_tilde
    return rho * D * (u**H_tilde - (u - min(u, v))**H_tilde)


fWW = np.vectorize(
    lambda i, j: covW_fun(times_discretized[i], times_discretized[j]))
fZZ = np.vectorize(
    lambda i, j: min(times_discretized[i], times_discretized[j]))
fWZ = np.vectorize(
    lambda i, j: covWZ_fun(times_discretized[i], times_discretized[j]))
fWW_ufunc = np.frompyfunc(fWW, 2, 1)
fZZ_ufunc = np.frompyfunc(fZZ, 2, 1)
fWZ_ufunc = np.frompyfunc(fWZ, 2, 1)

integersNd = np.arange(N_d)
covWW = fWW_ufunc.outer(integersNd, integersNd)
covZZ = fZZ_ufunc.outer(integersNd, integersNd)
covWZ = fWZ_ufunc.outer(integersNd, integersNd)

covWW2 = np.zeros((N_d, N_d))
for i in range(N_d):
    for j in range(N_d):
        covWW2[i, j] = fWW(i, j)

del covWW
Esempio n. 60
0
# timeit_vf: avg=0.325 sec, median=0.321 sec
# timeit_uf: avg=0.271 sec, median=0.268 sec
# timeit_np: avg=0.040 sec, median=0.040 sec

N = 1000000
l = np.random.rand(N)

#f = lambda x: x ** 3


def f(x):
    return x**3


vf = np.vectorize(f)
uf = np.frompyfunc(f, 1, 1)


@tu.timeit
def timeit_f(l):
    for i in l:
        f(i)


@tu.timeit
def timeit_vf(l):
    vf(l)


@tu.timeit
def timeit_uf(l):