Example #1
0
    def events(self):
        if isinstance(self.__events, types.NoneType):
            # the indices of the channels within this cluster
            cidx  = [channel.index() for channel in self]
            #print cidx
            # the eventdata from where to extract
            data  = self.__eventdata._data

            #select all events for this cluster
            eventmask = data['clid'] == self.__index
            #select first and last frame
            eventmask[0]  = True
            eventmask[-1] = True

            #create recarray that stores the events of this cluster
            if self.__model == dyk:                
                self.__events = numpy.recarray(shape = (eventmask.sum()),dtype = [('t', '<f8'), ('noch', '<i2'), ('chid', int), ('states', '|i1', (len(cidx), 8))])
            elif self.__model == deterministic:
                self.__events = numpy.recarray(shape = (eventmask.sum()),dtype = [('t', '<f8'), ('noch', '<i2'), ('chid', int), ('states', bool, (len(cidx),))])

            # copy time chid and subspace of state column to new recarray
            self.__events['t']       = data[eventmask]['t']
            self.__events['chid']    = data[eventmask]['chid']
            self.__events['states']  = data[eventmask]['states'][:,self.__index,...]
            
            # cache the number of open channels         
            model =  self.__eventdata.model()
            self.__events['noch'] = model.open(self.__events).sum(-1)
            
        return self.__events
Example #2
0
    def selectOnSharpeRatio(self, ls_symbols, top_n_equities=10):
        ''' Choose the best portfolio over the stock universe,
        according to their sharpe ratio'''
        #TODO: change this to a DataAccess utilitie --------------
        symbols, files = getAllFromCSV()
        datalength = len(recfromcsv(files[0])['close'])
        print('Datalength: {}'.format(datalength))
        #---------------------------------------------------------
        #Initiaing data arrays
        closes = np.recarray((datalength,), dtype=[(symbol, 'float') for symbol in symbols])
        daily_ret = np.recarray((datalength - 1,), dtype=[(symbol, 'float') for symbol in symbols])
        average_returns = np.zeros(len(files))
        return_stdev = np.zeros(len(files))
        sharpe_ratios = np.zeros(len(files))
        cumulative_returns = np.recarray((datalength-1,), dtype=[(symbol, 'float') for symbol in symbols])

        # Here is the meat
        #TODO: data = dataobj.getData(ls_symbols)
        for i, symbol in enumerate(ls_symbols):
            if len(data) != datalength:
                continue
            print('Processing {} file'.format(file))
            closes[symbols[i]] = data['close'][::-1]
            daily_ret[symbols[i]] = dailyReturns()
            # We now can compute:
            average_returns[i] = daily_ret[symbols[i]].mean()
            return_stdev[i] = daily_ret[symbols[i]].stdev()
            sharpe_ratios[i] = (average_returns[i] / return_stdev[i]) * np.sqrt(datalength)   # compare to course
            print('\tavg: {}, stdev: {}, sharpe ratio: {}'.format(average_returns[i], return_stdev[i], sharpe_ratios[i]))

        sorted_sharpe_indices = np.argsort(sharpe_ratios)[::-1][0:top_n_equities]
        #TODO: return a disct as {symbol: sharpe_ratio}, or a df with all 3 components
        return sorted_sharpe_indices
Example #3
0
    def __init__(self, analyzer):
        n = analyzer.frames_in_flight

        fw, fh = analyzer.camera.frame_size
        c = analyzer.camera.channels

        rw, rh = analyzer.rectifier.image_size

        self.frames = numpy.recarray(n, [
            ("index", "u4"),
            ("timestamp", "f8"),
            ("image", "u1", (fh, fw, c)),
            ("image_f", "f4", (fh, fw, c)),
            ("table", analyzer.table_tracker.dtype),
            ("rectification", "f4", (rh, rw, c)),
            ("background", analyzer.background_analyzer.dtype),
            ("team_foosmen", analyzer.team_foosmen_analyzer.dtype, len(analyzer.table.teams)),
            ("ball", analyzer.ball_analyzer.dtype),
            ("rod", [
                ("%s_%s" % (rod.type.name, rod.team.name), analyzer.rod_analyzer[i].dtype) for i, rod in enumerate(analyzer.table.rods)
            ])
        ])

        self.background = numpy.recarray((), [
            ("color_mean", "f4", (rh, rw, c)),
            ("variance", "f8", (c, c)),
            ("q_estimation", "f4", (rh, rw))
        ])

        self.team_foosmen = numpy.recarray(len(analyzer.table.teams), [
            ("color_mean", "f4", (rh, rw, c)),
            ("variance", "f8", (c, c)),
        ])
Example #4
0
 def events(self):
     '''return a rec array with all events of this channel'''
     if self.__events == None:           
         #the event data from where to extract
         data  = self.__eventdata._data
                     
         #select all events for this cluster
         eventmask = data['chid'] == self.__index
         
         # if there are no events return empty recarray
         if len(data) == 0:
             self.__events = numpy.recarray(shape = (0),dtype = [('t', '<f8'), ('states', self.__model.state_type())])
             return self.__events
             
         #select first and last frame
         eventmask[0]  = True
         eventmask[-1] = True
         
         #create recarray that stores the events of this channel
         # this works for DYK
         # self.__events = numpy.recarray(shape = (eventmask.sum()),dtype = [('t', '<f8'), ('states', '|i1', 8)])
         self.__events = numpy.recarray(shape = (eventmask.sum()),dtype = [('t', '<f8'), ('states', self.__model.state_type())])
         
         # copy time chid and subspace of state column to new recarray
         self.__events['t']       = data[eventmask]['t']
         # this works for DYK
         #self.__events['states']  = data[eventmask]['states'][:,self.__index,:]
         self.__events['states']  = data[eventmask]['states'][:,self.clusterindex(),self.__index]
         
     return self.__events
Example #5
0
    def test_multiple_args_records(self): 
        pyfunc = foobar

        mystruct_dt = np.dtype([('p', np.float64),
                           ('row', np.float64),
                           ('col', np.float64)])
        mystruct = numpy_support.from_dtype(mystruct_dt)

        cres = compile_isolated(pyfunc, [mystruct[:], types.uint64, types.uint64],
                return_type=mystruct[:])
        cfunc = cres.entry_point

        st1 = np.recarray(3, dtype=mystruct_dt)
        st2 = np.recarray(3, dtype=mystruct_dt)

        st1.p = np.arange(st1.size) + 1
        st1.row = np.arange(st1.size) + 1
        st1.col = np.arange(st1.size) + 1

        st2.p = np.arange(st2.size) + 1
        st2.row = np.arange(st2.size) + 1
        st2.col = np.arange(st2.size) + 1

        test_fail_args = ((st1, -1, st2), (st1, st2, -1))
        
        # TypeError is for 2.6
        if sys.version_info >= (2, 7):
            with self.assertRaises(OverflowError):
                for a, b, c in test_fail_args:
                    cfunc(a, b, c) 
        else:
            with self.assertRaises(TypeError):
                for a, b, c in test_fail_args:
                    cfunc(a, b, c) 
Example #6
0
 def _get_subheaders(self):
     """retreive all subheaders and return list of subheader recarrays
     """
     subheaders = []
     header = self._header
     endianness = self.endianness
     dt = self._subhdrdtype
     if not self.endianness is native_code:
         dt = self._subhdrdtype.newbyteorder(self.endianness)
     if self._header['num_frames'] > 1:
         for item in self._mlist._mlist:
             if item[1] == 0:
                 break
             self.fileobj.seek(0)
             offset = (int(item[1])-1)*512
             self.fileobj.seek(offset)
             tmpdat = self.fileobj.read(512)
             sh = (np.recarray(shape=(), dtype=dt,
                               buf=tmpdat))
             subheaders.append(sh.copy())
     else:
         self.fileobj.seek(0)
         offset = (int(self._mlist._mlist[0][1])-1)*512
         self.fileobj.seek(offset)
         tmpdat = self.fileobj.read(512)
         sh = (np.recarray(shape=(), dtype=dt,
                           buf=tmpdat))
         subheaders.append(sh)
     return subheaders
Example #7
0
    def test_structured_arrays(self):
        def check(arr, dtype, ndim, layout, aligned):
            ty = typeof(arr)
            self.assertIsInstance(ty, types.Array)
            self.assertEqual(ty.dtype, dtype)
            self.assertEqual(ty.ndim, ndim)
            self.assertEqual(ty.layout, layout)
            self.assertEqual(ty.aligned, aligned)

        dtype = np.dtype([('m', np.int32), ('n', 'S5')])
        rec_ty = numpy_support.from_struct_dtype(dtype)

        arr = np.empty(4, dtype=dtype)
        check(arr, rec_ty, 1, "C", False)
        arr = np.recarray(4, dtype=dtype)
        check(arr, rec_ty, 1, "C", False)

        dtype = np.dtype([('m', np.int32), ('n', 'S5')], align=True)
        rec_ty = numpy_support.from_struct_dtype(dtype)

        # On Numpy 1.6, align=True doesn't align the itemsize
        actual_aligned = numpy_support.version >= (1, 7)

        arr = np.empty(4, dtype=dtype)
        check(arr, rec_ty, 1, "C", actual_aligned)
        arr = np.recarray(4, dtype=dtype)
        check(arr, rec_ty, 1, "C", actual_aligned)
def Xi2_line_ratios(obs_ratios, arxvPDR):
    '''Computes the Xi2 statistic given the observed lines and a PDR arxv.'''
    
    
    allData = numpy.recarray([],[('x', 'f8'),('y', 'f8'),('z', 'f8'),('t', 'f8'),('v', 'f8'),])
    
    models = {} 
    
    specStrs, codes = obs_ratios.species_and_codes()

    #collecting all the line intensities of the ratios involved in the observations (obs_ratio)
    #from the model database. Proccessing one Av at a time...
    for i, AvStr in enumerate(arxvPDR.radexDbs):
                
        Av = numpy.float64(AvStr)

        #array which will hold the grid points and the values for this Av
        data = numpy.recarray((arxvPDR.nMeshes), allData.dtype.descr)

        #getting the emissions for each line from the PDR database for all the models for the current Av
        for code in codes:
            models[code] = 10.0**arxvPDR.get_emissions_from_databases(line={'type':'radex-lvg', 'code':code}, Av_use=Av)

        #defining the array which will hold the Xi2 for all the models for this Av
        Xi2 = numpy.zeros(arxvPDR.nMeshes, 'f8')
        
        #compute the Xi2
        for obs_ratio in obs_ratios:
            
            #the line codes invloved in this ratio
            code1, code2 = codes_from_ratio(obs_ratio)
            
            #the ratios for all the models at this Av for this particular line ratio
            model_ratio = models[code1] / models[code2] 
            
            #computing the Xi2
            f_o = obs_ratios[obs_ratio]['v']
            f_e = obs_ratios[obs_ratio]['e']
            f_m = model_ratio
            
            Xi2 += ((f_m - f_o)/f_e)**2.0
        #
        
        data.x = arxvPDR.grid_x
        data.y = arxvPDR.grid_y
        data.z = arxvPDR.grid_z
        data.t = Av
        data.v = Xi2

        allData = numpy.hstack((allData, data) )

    #removing the first entry (redundant ;;; .. todo:: fix this [low priority])
    allData = allData[1::]
    
    #filtering out the points which have Nans 
    inds_not_nan = numpy.where( numpy.isfinite(allData['v']) )

    return allData[inds_not_nan]
        
    return allData[1::]
Example #9
0
    def test_usecase1(self):
        pyfunc = usecase1

        # This is an unaligned dtype
        mystruct_dt = numpy.dtype([('p', numpy.float64),
                           ('row', numpy.float64),
                           ('col', numpy.float64)])
        mystruct = numpy_support.from_dtype(mystruct_dt)

        cres = compile_isolated(pyfunc, (mystruct[:], mystruct[:]))
        cfunc = cres.entry_point

        st1 = numpy.recarray(3, dtype=mystruct_dt)
        st2 = numpy.recarray(3, dtype=mystruct_dt)

        st1.p = numpy.arange(st1.size) + 1
        st1.row = numpy.arange(st1.size) + 1
        st1.col = numpy.arange(st1.size) + 1

        st2.p = numpy.arange(st2.size) + 1
        st2.row = numpy.arange(st2.size) + 1
        st2.col = numpy.arange(st2.size) + 1

        expect1 = st1.copy()
        expect2 = st2.copy()

        got1 = expect1.copy()
        got2 = expect2.copy()

        pyfunc(expect1, expect2)
        cfunc(got1, got2)

        self.assertTrue(numpy.all(expect1 == got1))
        self.assertTrue(numpy.all(expect2 == got2))
Example #10
0
def addfield(mrecord, newfield, newfieldname=None):
    """Adds a new field to the masked record array, using `newfield` as data
and `newfieldname` as name. If `newfieldname` is None, the new field name is
set to 'fi', where `i` is the number of existing fields.
    """
    _data = mrecord._data
    _mask = mrecord._mask
    if newfieldname is None or newfieldname in reserved_fields:
        newfieldname = 'f%i' % len(_data.dtype)
    newfield = ma.array(newfield)
    # Get the new data ............
    # Create a new empty recarray
    newdtype = np.dtype(_data.dtype.descr + [(newfieldname, newfield.dtype)])
    newdata = recarray(_data.shape, newdtype)
    # Add the exisintg field
    [newdata.setfield(_data.getfield(*f), *f)
         for f in _data.dtype.fields.values()]
    # Add the new field
    newdata.setfield(newfield._data, *newdata.dtype.fields[newfieldname])
    newdata = newdata.view(MaskedRecords)
    # Get the new mask .............
    # Create a new empty recarray
    newmdtype = np.dtype([(n, bool_) for n in newdtype.names])
    newmask = recarray(_data.shape, newmdtype)
    # Add the old masks
    [newmask.setfield(_mask.getfield(*f), *f)
         for f in _mask.dtype.fields.values()]
    # Add the mask of the new field
    newmask.setfield(getmaskarray(newfield),
                     *newmask.dtype.fields[newfieldname])
    newdata._mask = newmask
    return newdata
Example #11
0
 def test_add_data_then_read(self):
     data0 = np.recarray((1,), dtype=[("f0", "<f8"), ("f1", "<f8"), ("f2", "<f8")])
     data0[0] = (1, 2, 3)
     data1 = np.recarray((1,), dtype=[("f0", "<f8"), ("f1", "<f8"), ("f2", "<f8")])
     data1[0] = (4, 5, 6)
     self.data.addData(data0)
     self.data.addData(data1)
     self.assert_data_in_backend(self.data, [[1, 2, 3], [4, 5, 6]])
Example #12
0
def create_neurohdf_file(filename, data):

    with closing(h5py.File(filename, 'w')) as hfile:
        hfile.attrs['neurohdf_version'] = '0.1'
        mcgroup = hfile.create_group("Microcircuit")
        mcgroup.attrs['node_type'] = 'irregular_dataset'
        vert = mcgroup.create_group("vertices")
        conn = mcgroup.create_group("connectivity")

        vert.create_dataset("id", data=data['vert']['id'])
        vert.create_dataset("location", data=data['vert']['location'])
        verttype=vert.create_dataset("type", data=data['vert']['type'])
        # create rec array with two columns, value and name
        my_dtype = np.dtype([('value', 'l'), ('name', h5py.new_vlen(str))])
        helpdict={VerticesTypeSkeletonRootNode['id']: VerticesTypeSkeletonRootNode['name'],
                  VerticesTypeSkeletonNode['id']: VerticesTypeSkeletonNode['name'],
                  VerticesTypeConnectorNode['id']: VerticesTypeConnectorNode['name']
        }
        arr=np.recarray( len(helpdict), dtype=my_dtype )
        for i,kv in enumerate(helpdict.items()):
            arr[i][0] = kv[0]
            arr[i][1] = kv[1]
        verttype.attrs['value_name']=arr

        vert.create_dataset("confidence", data=data['vert']['confidence'])
        vert.create_dataset("userid", data=data['vert']['userid'])
        vert.create_dataset("radius", data=data['vert']['radius'])
        vert.create_dataset("skeletonid", data=data['vert']['skeletonid'])
        vert.create_dataset("creation_time", data=data['vert']['creation_time'])
        vert.create_dataset("modification_time", data=data['vert']['modification_time'])

        conn.create_dataset("id", data=data['conn']['id'])
        if data['conn'].has_key('type'):
            conntype=conn.create_dataset("type", data=data['conn']['type'])
            helpdict={ConnectivityNeurite['id']: ConnectivityNeurite['name'],
                      ConnectivityPresynaptic['id']: ConnectivityPresynaptic['name'],
                      ConnectivityPostsynaptic['id']: ConnectivityPostsynaptic['name']
            }
            arr=np.recarray( len(helpdict), dtype=my_dtype )
            for i,kv in enumerate(helpdict.items()):
                arr[i][0] = kv[0]
                arr[i][1] = kv[1]
            conntype.attrs['value_name']=arr

        if data['conn'].has_key('skeletonid'):
            conn.create_dataset("skeletonid", data=data['conn']['skeletonid'])

        if data.has_key('meta'):
            metadata=mcgroup.create_group('metadata')
            # create recarray with two columns, skeletonid and string
            my_dtype = np.dtype([('skeletonid', 'l'), ('name', h5py.new_vlen(str))])
            arr=np.recarray( len(data['meta']), dtype=my_dtype )
            for i,kv in enumerate(data['meta'].items()):
                arr[i][0] = kv[0]
                arr[i][1] = kv[1]

            metadata.create_dataset('skeleton_name', data=arr )
Example #13
0
 def __init__(self, stid, nlat, elon, elev):
     self.stid  = stid
     self.nlat  = nlat
     self.elon  = elon
     self.elev  = elev
     # Measured data
     self.datat = np.recarray((NPTSt,), dtype={"names": ("flux", "sun_alt", "moon_phase"),
                                               "formats": (np.int64, np.float64, np.float64)})
     self.datap = np.recarray((NPTSp,), dtype={"names": ("flux", "sun_alt", "moon_phase"),
                                               "formats": (np.int64, np.float64, np.float64)})
Example #14
0
def make_polynomial_psf_params(ntrain, nvalidate, nvisualize):
    """ Make training/testing data for PSF with params varying as polynomials.
    """
    bd = galsim.BaseDeviate(5772156649+314159)
    ud = galsim.UniformDeviate(bd)

    training_data = np.recarray((ntrain,), dtype=star_type)
    validate_data = np.recarray((nvalidate,), dtype=star_type)

    # Make randomish Chebyshev polynomial coefficients
    # 5 Different arrays (hlr, g1, g2, u0, v0), and up to 3rd order in each of x and y.
    coefs = np.empty((4, 4, 5), dtype=float)
    for (i, j, k), _ in np.ndenumerate(coefs):
        coefs[i, j, k] = 2*ud() - 1.0

    for i in range(ntrain):
        u = ud()
        v = ud()
        flux = ud()*50+100
        vals = np.polynomial.chebyshev.chebval2d(u, v, coefs)/6  # range is [-0.5, 0.5]
        hlr = vals[0] * 0.1 + 0.35
        g1 = vals[1] * 0.1
        g2 = vals[2] * 0.1
        u0 = vals[3]
        v0 = vals[4]
        training_data[i] = (u, v, hlr, g1, g2, u0, v0, flux)

    for i in range(nvalidate):
        u = ud()*0.5 + 0.25
        v = ud()*0.5 + 0.25
        flux = 1.0
        vals = np.polynomial.chebyshev.chebval2d(u, v, coefs)/6  # range is [-0.5, 0.5]
        hlr = vals[0] * 0.1 + 0.35
        g1 = vals[1] * 0.1
        g2 = vals[2] * 0.1
        u0 = vals[3]
        v0 = vals[4]
        validate_data[i] = (u, v, hlr, g1, g2, u0, v0, flux)

    vis_data = np.recarray((nvisualize*nvisualize), dtype=star_type)
    u = v = np.linspace(0, 1, nvisualize)
    u, v = np.meshgrid(u, v)
    for i, (u1, v1) in enumerate(zip(u.ravel(), v.ravel())):
        vals = np.polynomial.chebyshev.chebval2d(u1, v1, coefs)/6  # range is [-0.5, 0.5]
        hlr = vals[0] * 0.1 + 0.35
        g1 = vals[1] * 0.1
        g2 = vals[2] * 0.1
        u0 = vals[3]
        v0 = vals[4]
        vis_data[i] = (u1, v1, hlr, g1, g2, u0, v0, 1.0)

    return training_data, validate_data, vis_data.reshape((nvisualize, nvisualize))
    def test_save_results(self):
        # test for 1d
        # test for 2d
        # test for 3d
        # test for very large
        
        nr_experiments = 10000
        experiments = np.recarray((nr_experiments,),
                               dtype=[('x', float), ('y', float)])
        outcome_a = np.random.rand(nr_experiments,1)
        
        results = (experiments, {'a': outcome_a})
    
        fn = u'../data/test.tar.gz'
        
        save_results(results, fn)
        os.remove(fn)
#         ema_logging.info('1d saved successfully')
        
        nr_experiments = 10000
        nr_timesteps = 100
        experiments = np.recarray((nr_experiments,),
                               dtype=[('x', float), ('y', float)])
        outcome_a = np.zeros((nr_experiments,nr_timesteps))
        
        results = (experiments, {'a': outcome_a})
        save_results(results, fn)
        os.remove(fn)
#         ema_logging.info('2d saved successfully')
     
     
        nr_experiments = 10000
        nr_timesteps = 100
        nr_replications = 10
        experiments = np.recarray((nr_experiments,),
                               dtype=[('x', float), ('y', float)])
        outcome_a = np.zeros((nr_experiments,nr_timesteps,nr_replications))
         
        results = (experiments, {'a': outcome_a})
        save_results(results, fn)
        os.remove(fn)
#         ema_logging.info('3d saved successfully')
        
        nr_experiments = 500000
        nr_timesteps = 100
        experiments = np.recarray((nr_experiments,),
                               dtype=[('x', float), ('y', float)])
        outcome_a = np.zeros((nr_experiments,nr_timesteps))
        
        results = (experiments, {'a': outcome_a})
        save_results(results, fn)
        os.remove(fn)
Example #16
0
    def test_record_write_2d_array(self):
        """
        Test writing to a 2D array within a structured type
        """
        nbval = np.recarray(1, dtype=recordwith2darray)
        nbrecord = numpy_support.from_dtype(recordwith2darray)
        cfunc = self.get_cfunc(record_write_2d_array, (nbrecord,))
        cfunc(nbval[0])

        expected = np.recarray(1, dtype=recordwith2darray)
        expected[0].i = 3
        expected[0].j[:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], np.float32).reshape(3, 2)
        np.testing.assert_equal(expected, nbval)
Example #17
0
    def test_record_write_array(self):
        '''
        Testing writing to a 1D array within a structured type
        '''
        nbval = np.recarray(1, dtype=recordwitharray)
        nbrecord = numpy_support.from_dtype(recordwitharray)
        cfunc = self.get_cfunc(record_write_array, (nbrecord,))
        cfunc(nbval[0])

        expected = np.recarray(1, dtype=recordwitharray)
        expected[0].g = 2
        expected[0].h[0] = 3.0
        expected[0].h[1] = 4.0
        np.testing.assert_equal(expected, nbval)
Example #18
0
    def _load(self, maxentries=None):
        self._build_index(maxentries)
        incdict, cumdict = self._set_entries()
        if incdict is None and cumdict is None:
            return
        totim = []
        for ts, sp, seekpoint in self.idx_map:
            tinc, tcum = self._get_sp(ts, sp, seekpoint)
            for entry in self.entries:
                incdict[entry].append(tinc[entry])
                cumdict[entry].append(tcum[entry])

            # Get the time for this record
            seekpoint = self._seek_to_string('TIME SUMMARY AT END')
            tslen, sptim, tt = self._get_totim(ts, sp, seekpoint)
            totim.append(tt)

        # get kstp and kper
        idx_array = np.array(self.idx_map)

        # build dtype for recarray
        dtype_tups = [('totim', np.float32), ("time_step", np.int32),
                      ("stress_period", np.int32)]
        for entry in self.entries:
            dtype_tups.append((entry, np.float32))
        dtype = np.dtype(dtype_tups)

        # create recarray
        nentries = len(incdict[entry])
        self.inc = np.recarray(shape=(nentries,), dtype=dtype)
        self.cum = np.recarray(shape=(nentries,), dtype=dtype)

        # fill each column of the recarray
        for entry in self.entries:
            self.inc[entry] = incdict[entry]
            self.cum[entry] = cumdict[entry]

        # file the totim, time_step, and stress_period columns for the
        # incremental and cumulative recarrays (zero-based kstp,kper)
        self.inc['totim'] = np.array(totim)[:]
        self.inc["time_step"] = idx_array[:, 0] - 1
        self.inc["stress_period"] = idx_array[:, 1] - 1

        self.cum['totim'] = np.array(totim)[:]
        self.cum["time_step"] = idx_array[:, 0] - 1
        self.cum["stress_period"] = idx_array[:, 1] - 1

        return
Example #19
0
 def for_shape(self, shape):
     dtype = [(n,t) for n, t in self._stats_fields]
     data = np.recarray(shape, dtype=dtype)
     data[:] = 0
     stats = Statistics(data=data)
     stats._pointer = 0
     return stats
Example #20
0
    def __new__(cls, filename):
        """Create a new instance. Numpy array subclasses use this
        method instead of __init__ for initialization.

        """
        headerDict = cls._readHeader(filename)

        noindex = cls._load(filename, headerDict)
        index = np.recarray(shape=noindex.shape, dtype=cls.row)

        for el in cls._raw_row[2:]:
            key = el[0]
            index.__setattr__(key, noindex.__getattribute__(key))
            continue

        index.id = np.arange(len(noindex))
        try:
            index.position = cls._computePosition(index.grid, headerDict)
        except:
            index.position = np.nan
            pass

        obj = index.view(cls)

        # Set the attributes on the snapshot
        for headerField in headerDict:
            setattr(obj, headerField, headerDict[headerField])
            continue

        return obj
def storeStageData(stage_file, masked_image_file):
    ## read motor data from csv
    with open(stage_file) as fid:
        reader = csv.reader(fid)
        data = [line for line in reader]
    
    #if the csv lines must be larger than one (a header), othewise it is an empty file
    if len(data)<=1:
        with tables.File(masked_image_file, 'r+') as fid:
            dtype = [('real_time', int), ('stage_time', int), ('stage_x', float), ('stage_y', float)]
            fid.create_table('/', 'stage_data', obj = np.recarray(0, dtype))
            return

    #import pdb
    #pdb.set_trace()

    #filter, check and store the data into a recarray
    header, data = _getHeader(data)
    csv_dict =  _data2dict(header, data)
    stage_recarray = _dict2recarray(csv_dict)

    with tables.File(masked_image_file, 'r+') as mask_fid:
        if '/stage_data' in mask_fid: mask_fid.remove_node('/', 'stage_data')
        mask_fid.create_table('/', 'stage_data', obj = stage_recarray)
    
    return csv_dict
Example #22
0
 def as_recarray(self):
     """ Convert into numpy recordarray """
     dtype = [(k,v.dtype) for k,v in self.__dict__.iteritems()]
     R = numpy.recarray(len(self.__dict__[k]),dtype=dtype)
     for key in self.__dict__:
         R[key] = self.__dict__[key]
     return R
Example #23
0
 def _compute_asset_lifetimes(self):
     """
     Compute and cache a recarry of asset lifetimes.
     """
     equities_cols = self.equities.c
     buf = np.array(
         tuple(
             sa.select((
                 equities_cols.sid,
                 equities_cols.start_date,
                 equities_cols.end_date,
             )).execute(),
         ), dtype='<f8',  # use doubles so we get NaNs
     )
     lifetimes = np.recarray(
         buf=buf,
         shape=(len(buf),),
         dtype=[
             ('sid', '<f8'),
             ('start', '<f8'),
             ('end', '<f8')
         ],
     )
     start = lifetimes.start
     end = lifetimes.end
     start[np.isnan(start)] = 0  # convert missing starts to 0
     end[np.isnan(end)] = np.iinfo(int).max  # convert missing end to INTMAX
     # Cast the results back down to int.
     return lifetimes.astype([
         ('sid', '<i8'),
         ('start', '<i8'),
         ('end', '<i8'),
     ])
Example #24
0
 def test_mlist(self):
     fid = open(self.example_file, 'rb')
     hdr = self.header_class.from_fileobj(fid)
     mlist =  self.mlist_class(fid, hdr)
     fid.seek(0)
     fid.seek(512)
     dat=fid.read(128*32)
     dt = np.dtype([('matlist',np.int32)])
     dt = dt.newbyteorder('>')
     mats = np.recarray(shape=(32,4), dtype=dt,  buf=dat)
     fid.close()
     #tests
     assert_true(mats['matlist'][0,0] +  mats['matlist'][0,3] == 31)
     assert_true(mlist.get_frame_order()[0][0] == 0)
     assert_true(mlist.get_frame_order()[0][1] == 16842758.0)
     # test badly ordered mlist
     badordermlist = mlist
     badordermlist._mlist = np.array([[  1.68427540e+07,   3.00000000e+00,
                                         1.20350000e+04,   1.00000000e+00],
                                      [  1.68427530e+07,   1.20360000e+04,
                                         2.40680000e+04,   1.00000000e+00],
                                      [  1.68427550e+07,   2.40690000e+04,
                                         3.61010000e+04,   1.00000000e+00],
                                      [  1.68427560e+07,   3.61020000e+04,
                                         4.81340000e+04,   1.00000000e+00],
                                      [  1.68427570e+07,   4.81350000e+04,
                                         6.01670000e+04,   1.00000000e+00],
                                      [  1.68427580e+07,   6.01680000e+04,
                                         7.22000000e+04,   1.00000000e+00]])
     assert_true(badordermlist.get_frame_order()[0][0] == 1)
Example #25
0
    def _allocate_output(self, windows, shape):
        """
        Allocate an output array whose rows should be passed to `self.compute`.

        The resulting array must have a shape of ``shape``.

        If we have standard outputs (i.e. self.outputs is NotSpecified), the
        default is an empty ndarray whose dtype is ``self.dtype``.

        If we have an outputs tuple, the default is an empty recarray with
        ``self.outputs`` as field names. Each field will have dtype
        ``self.dtype``.

        This can be overridden to control the kind of array constructed
        (e.g. to produce a LabelArray instead of an ndarray).
        """
        missing_value = self.missing_value
        outputs = self.outputs
        if outputs is not NotSpecified:
            out = recarray(
                shape,
                formats=[self.dtype.str] * len(outputs),
                names=outputs,
            )
            out[:] = missing_value
        else:
            out = full(shape, missing_value, dtype=self.dtype)
        return out
Example #26
0
def CfxCentreLineSnapshot(filename):
    """Factory function wrapping a CFX snapshot.
    
    Load the data with:
    >>> snap = CfxSnapshot(filename)

    Fields are constructed from the header line.
    
        
    """
    (__raw_row, fieldUnits) = parseHeader(filename, AllData=True)
    __raw_row = [('id', int),] + __raw_row
    fieldUnits['id'] = 1
    
                 # ('position', float, (3,)),
                 # ('strain_rate', float),
                 # ('speed', float),
                 # ('velocity', float, (3,)),
                 # ('wall_shear', float, (4,))]

    __readable_row = np.dtype(__raw_row[1:])
    row = np.dtype(__raw_row)
    
    noindex = np.genfromtxt(filename, skip_header=findStart(filename, AllData=True)+2,
                           delimiter=',',
                           dtype=__readable_row).view(np.recarray)
    index = np.recarray(shape=noindex.shape, dtype=row)
    index.id = np.arange(len(noindex))
    for el in __raw_row[1:]:
        key = el[0]
        index.__setattr__(key, U.convert(noindex.__getattribute__(key), fieldUnits[key], hlbUnits[key]))
        continue
    
    return index
Example #27
0
def tree_to_recarray_py(trees, branches=None,
                        use_cache=False, cache_size=1000000,
                        include_weight=False,
                        weight_name='weight',
                        weight_dtype='f4'):
    """
    Convert a tree or a list of trees into a numpy.recarray
    with fields corresponding to the tree branches

    (the slow pure-Python way...)
    """
    if not isinstance(trees, (list, tuple)):
        trees = [trees]
    trees = [asrootpy(tree) for tree in trees]
    # if branches is None then select only branches with basic types
    # i.e. no vectors or other special objects
    tree = trees[0]
    _branches = {}
    if branches is None:
        branches = []
        for name, value in tree.buffer.items():
            if isinstance(value, Variable):
                _branches[name] = value
                branches.append(name)
    else:
        if len(set(branches)) != len(branches):
            raise ValueError("branches contains duplicates")
        for branch in branches:
            if branch not in tree.buffer:
                raise ValueError("Branch %s does not exist in tree" % branch)
            value = tree.buffer[branch]
            if not isinstance(value, Variable):
                raise TypeError("Branch %s is not a basic type: %s" %
                                (branch, type(value)))
            _branches[branch] = value
    if not _branches:
        return None
    dtype = [(name, convert('ROOTCODE', 'NUMPY', _branches[name].type))
             for name in branches]
    if include_weight:
        if weight_name not in _branches:
            dtype.append((weight_name, weight_dtype))
        else:
            raise ValueError("Weight name '%s' conflicts "
                             "with another field name" % weight_name)
    total_entries = sum([tree.GetEntries() for tree in trees])
    array = np.recarray(shape=(total_entries,), dtype=dtype)
    i = 0
    for tree in trees:
        tree.use_cache(use_cache, cache_size=cache_size, learn_entries=1)
        if use_cache:
            tree.always_read(branches)
        tree_weight = tree.GetWeight()
        for entry in tree:
            for j, branch in enumerate(branches):
                array[i][j] = entry[branch].value
            if include_weight:
                array[i][-1] = tree_weight
            i += 1
    return array
Example #28
0
    def test_get_data_transpose(self):
        data_to_add = np.recarray((2,), dtype=[("f0", "<f8"), ("f1", "<f8"), ("f2", "<f8")])
        data_to_add[0] = (1, 2, 3)
        data_to_add[1] = (4, 5, 6)
        self.data.addData(data_to_add)

        self.assertRaises(RuntimeError, self.data.getData, None, 0, True, None)
    def markers(self):
        """
        Return the list of markers in struct-array form.

        What this produces should be interpreted by hdf5storage as a struct
        array. We represent the list of n items as a dict where each key is a
        list of n items.
        """
        ret_list = self._instances[:] + self._markers[:]
        for name, start_times in self._marker_name_to_start.items():
            for start_time in start_times:
                ret_list.append({
                    'type': 'Marker',
                    'name': name,
                    'times': [start_time],
                })
        def time_ordering(item):    # pylint:disable=missing-docstring
            if item['times'][0] < 0:
                return item['times'][-1]
            else:
                return item['times'][0]
        ret_sorted = sorted(ret_list, key=time_ordering)
        ret_rec = np.recarray((len(ret_sorted),),
                              dtype=[('type', 'O', (1,1)),
                                     ('name', 'O', (1,1)),
                                     ('times', 'O')])
        for i, x in enumerate(ret_sorted):
            name = np.asarray(x['name'], dtype=np.string_)
            ret_rec[i]['name'][0] = np.asarray(x['name'], dtype=np.string_)
            ret_rec[i]['times'] = np.asarray(x['times'])
            ret_rec[i]['type'][0] = np.asarray(x['type'], dtype=np.string_)
        return ret_rec
Example #30
0
    def test_multiple_args_records(self): 
        pyfunc = foobar

        mystruct_dt = np.dtype([('p', np.float64),
                           ('row', np.float64),
                           ('col', np.float64)])
        mystruct = numpy_support.from_dtype(mystruct_dt)

        cres = compile_isolated(pyfunc, [mystruct[:], types.uint64, types.uint64],
                                return_type=mystruct[:])
        cfunc = cres.entry_point

        st1 = np.recarray(3, dtype=mystruct_dt)

        st1.p = np.arange(st1.size) + 1
        st1.row = np.arange(st1.size) + 1
        st1.col = np.arange(st1.size) + 1

        old_refcnt_st1 = sys.getrefcount(st1)

        test_fail_args = ((st1, -1, 1), (st1, 1, -1))

        # TypeError is for 2.6
        exc_type = OverflowError if sys.version_info >= (2, 7) else TypeError
        for a, b, c in test_fail_args:
            with self.assertRaises(exc_type):
                cfunc(a, b, c)

        del test_fail_args, a, b, c
        gc.collect()
        self.assertEqual(sys.getrefcount(st1), old_refcnt_st1)
Example #31
0
def save_records(hfile, where, data):
    """Save record array-like data to HDF5.

    Parameters
    ----------
    hfile: h5py.File
        Opened HDF5 file object.
    where: str
        Dataset name.
    data: Union[pd.DataFrame, np.array]
        The data to write.

    Notes
    -----
    When saving a DataFrame, the index information will be lost.

    """
    original_type = str(type(data))

    if isinstance(data, pd.DataFrame):
        data = data.to_records(index=False)
    if not isinstance(data, np.recarray):
        data = np.rec.array(data)

    dtype = []
    utf8_encoded = set()
    json_encoded = set()

    for name in data.dtype.names:
        this_dtype = data[name].dtype
        if this_dtype.itemsize == 0:
            this_dtype = np.dtype('|{}1'.format(this_dtype.char))

        if this_dtype == object or this_dtype.char == "U":
            dtype.append((name, "|S{}".format(maxlen(data[name]))))
            utf8_encoded.add(name)
        else:
            dtype.append((name, this_dtype))

    sanitized = np.recarray(data.shape, dtype=dtype)

    for i, (name, _) in enumerate(dtype):
        if name in utf8_encoded:
            try:
                sanitized[name] = vencode(data[name])
            except TypeError:  # try dumping with JSON (for list/dict types)
                json_data = [json.dumps(col).encode() for col in data[name]]

                # We have to change the dtype which requires copying the array.
                # Maybe there is a better way to detect if something is JSON-
                # encodable earlier on?
                dtype[i] = (name, "|S{}".format(maxlen(json_data)))
                sanitized = sanitized.astype(dtype)

                sanitized[name] = json_data
                utf8_encoded.remove(name)
                json_encoded.add(name)
        else:
            sanitized[name] = data[name]

    hfile[where] = sanitized
    hfile[where].attrs["tabular"] = True
    hfile[where].attrs["utf8_encoded_fields"] = json.dumps(list(utf8_encoded))
    hfile[where].attrs["json_encoded_fields"] = json.dumps(list(json_encoded))
    hfile[where].attrs["original_type"] = original_type
Example #32
0
def ohmi_envelope(ring, refpts=None, orbit=None, keep_lattice=False):
    """
    Calculate the equilibrium beam envelope in a
    circular accelerator using Ohmi's beam envelope formalism [1]

    emit0, beamdata, emit = ohmi_envelope(ring[, refpts])

    PARAMETERS
        ring            Lattice object.
        refpts=None     elements at which data is returned. It can be:
                        1) an integer in the range [-len(ring), len(ring)-1]
                           selecting the element according to python indexing
                           rules. As a special case, len(ring) is allowed and
                           refers to the end of the last element,
                        2) an ordered list of such integers without duplicates,
                        3) a numpy array of booleans of maximum length
                           len(ring)+1, where selected elements are True.

    KEYWORDS
        orbit=None          Avoids looking for the closed orbit if it is
                            already known ((6,) array)
        keep_lattice=False  Assume no lattice change since the previous
                            tracking

    OUTPUT
        emit0               emittance data at the start/end of the ring
        beamdata            beam parameters at the start of the ring
        emit                emittance data at the points refered to by refpts,
                            if refpts is None an empty structure is returned.

        emit is a record array with fields:
        r66                 (6, 6) equilibrium envelope matrix R
        r44                 (4, 4) betatron emittance matrix (dpp = 0)
        m66                 (6, 6) transfer matrix from the start of the ring
        orbit6              (6,) closed orbit
        emitXY              (2,) betatron emittance projected on xxp and yyp
        emitXYZ             (3,) 6x6 emittance projected on xxp, yyp, ldp

        beamdata is a record array with fields:
        tunes               tunes of the 3 normal modes
        damping_rates       damping rates of the 3 normal modes
        mode_matrices       R-matrices of the 3 normal modes
        mode_emittances     equilibrium emittances of the 3 normal modes

        Field values can be obtained with either
        emit['r66']    or
        emit.r66

    REFERENCES
        [1] K.Ohmi et al. Phys.Rev.E. Vol.49. (1994)
    """
    def process(r66):
        # projections on xx', zz', ldp
        emit3sq = numpy.array([det(r66[s, s]) for s in _submat])
        # Prevent from unrealistic negative values of the determinant
        emit3 = numpy.sqrt(numpy.maximum(emit3sq, 0.0))
        # Emittance cut for dpp=0
        if emit3[0] < 1.E-13:  # No equilibrium emittance
            r44 = numpy.nan * numpy.ones((4, 4))
        elif emit3[1] < 1.E-13:  # Uncoupled machine
            minv = inv(r66[[0, 1, 4, 5], :][:, [0, 1, 4, 5]])
            r44 = numpy.zeros((4, 4))
            r44[:2, :2] = inv(minv[:2, :2])
        else:  # Coupled machine
            minv = inv(r66)
            r44 = inv(minv[:4, :4])
        # betatron emittances (dpp=0)
        emit2sq = numpy.array(
            [det(r44[s, s], check_finite=False) for s in _submat[:2]])
        # Prevent from unrealistic negative values of the determinant
        emit2 = numpy.sqrt(numpy.maximum(emit2sq, 0.0))
        return r44, emit2, emit3

    def propag(m, cumb, orbit6):
        """Propagate the beam matrix to refpts"""
        sigmatrix = m.dot(rr).dot(m.T) + cumb
        m44, emit2, emit3 = process(sigmatrix)
        return sigmatrix, m44, m, orbit6, emit2, emit3

    nelems = len(ring)
    uint32refs = uint32_refpts(refpts, nelems)
    bbcum, orbs = _dmatr(ring, orbit=orbit, keep_lattice=keep_lattice)
    mring, ms = find_m66(ring, uint32refs, orbit=orbs[0], keep_lattice=True)
    # ------------------------------------------------------------------------
    # Equation for the moment matrix R is
    #         R = MRING*R*MRING' + BCUM;
    # We rewrite it in the form of Lyapunov-Sylvester equation to use scipy's
    # solve_sylvester function
    #            A*R + R*B = Q
    # where
    #               A =  inv(MRING)
    #               B = -MRING'
    #               Q = inv(MRING)*BCUM
    # ------------------------------------------------------------------------
    aa = inv(mring)
    bb = -mring.T
    qq = numpy.dot(aa, bbcum[-1])
    rr = solve_sylvester(aa, bb, qq)
    rr = 0.5 * (rr + rr.T)
    rr4, emitxy, emitxyz = process(rr)
    r66data = get_tunes_damp(mring, rr)

    data0 = numpy.rec.fromarrays((rr, rr4, mring, orbs[0], emitxy, emitxyz),
                                 dtype=ENVELOPE_DTYPE)
    if uint32refs.shape == (0, ):
        data = numpy.recarray((0, ), dtype=ENVELOPE_DTYPE)
    else:
        data = numpy.rec.fromrecords(list(
            map(propag, ms, bbcum[uint32refs], orbs[uint32refs, :])),
                                     dtype=ENVELOPE_DTYPE)

    return data0, r66data, data
Example #33
0
def test(*, l=False):
    """Test dnb.reduce_precision and hdf5plugin.Bitshuffle."""

    from math import sqrt
    from time import perf_counter

    # Parameters.
    nchan = 16  # Number of channels correlated
    nsamples = 100  # Number of samples integrated, delta_f*delta_t
    Tsys = 50  # System temperature
    f = 0.01  # Precision reduction parameter
    nfreq = 5  # Added dimensionality, spectral frequencies.
    ntime = 1000  # Added dimensionality, temporal integrations.

    # Made up channel dependant gain.
    gain_chan = numpy.arange(nchan) + nchan
    # Made up frequency dependant gain.
    bandpass = (numpy.arange(nfreq) + nfreq)**2

    # Generate mock data. Model is pure uncorrelated receiver noise.
    # Auto correlations are a number, everything else is noise.
    nprod = (nchan * (nchan + 1)) // 2
    vis = numpy.recarray((nfreq, nprod, ntime), DTYPE)
    chan_a = numpy.empty(nprod, numpy.int64)
    chan_b = numpy.empty(nprod, numpy.int64)

    for ff in range(nfreq):
        kk = 0
        for ii in range(nchan):
            for jj in range(ii, nchan):
                chan_a[kk] = ii
                chan_b[kk] = jj

                amp = Tsys * gain_chan[ii] * gain_chan[jj] * bandpass[ff]
                if (ii == jj):
                    vis[ff, kk].r = numpy.round(
                        amp *
                        abs(1.0 + numpy.random.randn(ntime) / sqrt(nsamples)))
                    vis[ff, kk].i = 0.0
                else:
                    vis[ff, kk].r = numpy.round(
                        amp * numpy.random.randn(ntime) / sqrt(2 * nsamples))
                    vis[ff, kk].i = numpy.round(
                        amp * numpy.random.randn(ntime) / sqrt(2 * nsamples))
                kk += 1

    # Reduce precision.
    t0 = perf_counter()
    vis_rounded = reduce_precision(vis, nchan, chan_a, chan_b, f / nsamples)
    t = perf_counter() - t0

    rate = nfreq * nprod * ntime * DTYPE.itemsize / t
    print("Throughput(reduce_precision): %f MiB/s" % (rate / 1024**2))

    # Compress.
    with h5py.File('test_int32.h5', 'w') as f:
        t0 = perf_counter()
        f.create_dataset('mock_data',
                         data=vis_rounded,
                         **hdf5plugin.Bitshuffle())
        t = perf_counter() - t0

    rate = nfreq * nprod * ntime * DTYPE.itemsize / t
    print("Throughput(bitshuffle_compress): %f MiB/s" % (rate / 1024**2))

    # Decompress.
    with h5py.File('test_int32.h5', 'r') as f:
        t0 = perf_counter()
        vis_decompressed = f['mock_data'][...]
        t = perf_counter() - t0

    rate = nfreq * nprod * ntime * DTYPE.itemsize / t
    print("Throughput(bitshuffle_decompress): %f MiB/s" % (rate / 1024**2))

    if numpy.any(vis_rounded != vis_decompressed):
        raise ValueError('Data changed after I/O.')

    # Calculate compression rate.
    import os
    rate = os.path.getsize('test_int32.h5') / (nfreq * nprod * ntime *
                                               DTYPE.itemsize)
    print('Compression rate: %f %%' % (100 * rate))

    rounding_error = (vis_rounded.r - vis.r).astype(numpy.int64)
    if l:
        print("Rounding bias:")
        print(numpy.mean(rounding_error, -1))
        print("Rounding RMS:")
        print(numpy.sqrt(numpy.mean(rounding_error**2, -1)))
        print("Relative to thermal noise:")
        print(numpy.mean(rounding_error**2, -1) / numpy.var(vis.r, -1))
Example #34
0
t = np.arange(200)
dt = t[1] - t[0]
if type == 'B':
    x1 = 1
    x2 = 0
elif type == '':
    x1 = 0
    x2 = 1

rec = np.recarray(1,
                  dtype=[
                      ('Q', 'f8', len(pmts)),
                      ('T', 'f8', len(pmts)),
                      ('St', 'f8', len(pmts)),
                      ('mu', 'f8', 1),
                      ('N', 'f8', 1),
                      ('F', 'f8', 1),
                      ('Tf', 'f8', 1),
                      ('Ts', 'f8', 1),
                      ('R', 'f8', 1),
                      ('a', 'f8', 1),
                      ('eta', 'f8', 1),
                  ])

Rec = np.recarray(5000,
                  dtype=[
                      ('Q', 'f8', len(pmts)),
                      ('T', 'f8', len(pmts)),
                      ('St', 'f8', len(pmts)),
                      ('mu', 'f8', 1),
                      ('N', 'f8', 1),
                      ('F', 'f8', 1),
Example #35
0
    def calc_LUT(self, use_common=True):
        """Calculate the Look-up table

        :return: look up table either in CSR or LUT format depending on serl.method
        """
        if self.pos is None:
            self.calc_pos()

        if self.max_size is None and not use_common:
            self.calc_size()
        if self.lut is None:
            with self._sem:
                if self.lut is None:
                    mask = self.mask
                    if _distortion:
                        if use_common:
                            self.lut = _distortion.calc_sparse(
                                self.pos,
                                self._shape_out,
                                max_pixel_size=(self.delta1, self.delta2),
                                format=self.method)
                        else:
                            if self.method == "lut":
                                self.lut = _distortion.calc_LUT(
                                    self.pos,
                                    self._shape_out,
                                    self.bin_size,
                                    max_pixel_size=(self.delta1, self.delta2))
                            else:
                                self.lut = _distortion.calc_CSR(
                                    self.pos,
                                    self._shape_out,
                                    self.bin_size,
                                    max_pixel_size=(self.delta1, self.delta2))
                    else:
                        lut = numpy.recarray(shape=(self._shape_out[0],
                                                    self._shape_out[1],
                                                    self.max_size),
                                             dtype=[("idx", numpy.uint32),
                                                    ("coef", numpy.float32)])
                        lut[:, :, :].idx = 0
                        lut[:, :, :].coef = 0.0
                        outMax = numpy.zeros(self._shape_out,
                                             dtype=numpy.uint32)
                        idx = 0
                        buffer_ = numpy.empty((self.delta1, self.delta2))
                        quad = Quad(buffer_)
                        for i in range(self._shape_out[0]):
                            for j in range(self._shape_out[1]):
                                if (mask is not None) and mask[i, j]:
                                    continue
                                # i,j, idx are indexes of the raw image uncorrected
                                quad.reinit(*list(self.pos[i,
                                                           j, :, :].ravel()))
                                # print(self.pos[i, j, 0, :], self.pos[i, j, 1, :], self.pos[i, j, 2, :], self.pos[i, j, 3, :]
                                try:
                                    quad.populate_box()
                                except Exception as error:
                                    print(
                                        "error in quad.populate_box of pixel %i, %i: %s"
                                        % (i, j, error))
                                    print("calc_area_vectorial",
                                          quad.calc_area_vectorial())
                                    print(self.pos[i, j, 0, :], self.pos[i, j,
                                                                         1, :],
                                          self.pos[i, j, 2, :], self.pos[i, j,
                                                                         3, :])
                                    print(quad)
                                    raise
                #                box = quad.get_box()
                                for ms in range(quad.get_box_size0()):
                                    ml = ms + quad.get_offset0()
                                    if ml < 0 or ml >= self._shape_out[0]:
                                        continue
                                    for ns in range(quad.get_box_size1()):
                                        # ms,ns are indexes of the corrected image in short form, ml & nl are the same
                                        nl = ns + quad.get_offset1()
                                        if nl < 0 or nl >= self._shape_out[1]:
                                            continue
                                        val = quad.get_box(ms, ns)
                                        if val <= 0:
                                            continue
                                        k = outMax[ml, nl]
                                        lut[ml, nl, k].idx = idx
                                        lut[ml, nl, k].coef = val
                                        outMax[ml, nl] = k + 1
                                idx += 1
                        lut.shape = (self._shape_out[0] *
                                     self._shape_out[1]), self.max_size
                        self.lut = lut
        return self.lut
Example #36
0
import os
import sys
from scipy.stats import poisson, binom
from scipy.special import erf as erf
from minimize import minimize
import multiprocessing

pmts = [0, 1, 4, 7, 8, 14]

Rec = np.recarray(1,
                  dtype=[
                      ('Q', 'f8', len(pmts)),
                      ('T', 'f8', len(pmts)),
                      ('St', 'f8', len(pmts)),
                      ('Sa', 'f8', len(pmts)),
                      ('mu', 'f8', 1),
                      ('W', 'f8', 1),
                      ('F', 'f8', 1),
                      ('Tf', 'f8', 1),
                      ('Ts', 'f8', 1),
                      ('R', 'f8', 1),
                      ('a', 'f8', 1),
                  ])

Rec[0] = ([
    0.28609523, 0.21198892, 0.1661045, 0.23595573, 0.2543458, 0.46767996
], [
    42.43727439, 42.48680044, 42.48223214, 42.61715417, 42.97131299,
    42.35603571
], [1.14722701, 0.82496347, 0.71858647, 1.61434698, 1.48554624, 1.03053529], [
    1.14722701, 0.82496347, 0.71858647, 1.61434698, 1.48554624, 1.03053529
], 2.57341188, 13.7, 0.11035399, 0.94339727, 34.3602973, 0.5760872, 0.36124252)
Example #37
0
def burstensemble(
    base,
    x_0,
    z,
    r1,
    r2,
    r3,
    mass,
    radius,
    bstart,
    pflux,
    numburstsobs,
):
    minmdot = 0.0
    maxmdot = 1.0
    mdot_res = 1e-6
    sbt = bstart
    salpha = []
    stime = []
    smdot = []
    se_b = []
    for i in range(0, numburstsobs):

        mdot = (0.67 / 8.8) * pflux[i] * r1
        tmp = settle(base, z, x_0, mdot, 1.0, mass, radius)

        mdot_hist = [mdot]
        while abs(mdot - mdot_hist[len(mdot_hist) - 1]) > mdot_res / 2.0 and (
                mdot > minmdot and mdot < maxmdot):
            mdot_hist.append(mdot)

        res = np.recarray((1, ),
                          dtype=[("tdel", np.float64), ("e_b", np.float64),
                                 ("alpha", np.float64), ("mdot", np.float64)])
        # assign elements
        res.tdel = tmp.tdel / 24.0
        res.e_b = tmp.E_b * 0.8  # multiply eb by 0.8 to account for incomlpete burning of fuel, as in Goodwin et al (2018).
        alpha = tmp.alpha
        alpha = alpha[0]
        res.mdot = mdot
        _e_b = res.e_b
        _e_b = _e_b[0]
        se_b.append(_e_b)
        _mdot = res.mdot
        _mdot = _mdot[0]
        salpha.append(alpha)
        smdot.append(_mdot)
        stime.append(bstart[i])
        mdot_max = max(smdot)

    result = dict()

    result["base"] = [base]
    result["z"] = [z]
    result["x_0"] = [x_0]
    result["r1"] = [r1]
    result["r2"] = [r2]
    result["r3"] = [r3]
    result["mdot"] = smdot
    result["mdot_max"] = [mdot_max]
    result["time"] = stime
    result["alpha"] = salpha
    result["e_b"] = se_b

    result["mass"] = [mass]
    result["radius"] = [radius]

    print('ensemble')
    print(f"In burstrain fluence is {se_b}")

    return result
Example #38
0
def next_burst(base,
               z,
               x_0,
               t1,
               tobs,
               a,
               b,
               r1,
               cfac,
               mass,
               radius,
               direction=1,
               debug=False):
    """
    Routine to find the next burst in the series and return its properties
    Adapted from sim_burst.pro
    """

    mdot_res = 1e-6
    fn = "next_burst"
    assert direction in (1, -1)

    minmdot = 0.0
    maxmdot = 1.0

    # a, b passed as an array of an array
    a = a[0]
    b = b[0]

    # Determine the initial guess for mean mdot (linear)
    #  i0=min([n_elements(a)-1,max(where(t1 gt tobs))])
    itobs = np.where(t1 > tobs)[0]
    if (len(itobs) == 0) & (direction == -1):
        # the start time is before *any* of the observations; don't bother!
        return None
    if len(itobs) == 0:
        # this makes no sense to me; if the t1 value is < all the tobs values, then the
        # nearest element would be the zeroth
        # itobs = [-1]
        itobs = [0]
    # i0=max([0,min([len(a)-1,max([i for i, value in enumerate(tobs) if value < t1])])])
    i0 = max([0, min([len(a) - 1, max(itobs)])])
    mdot0 = ((0.67 / 8.8) * (a[i0] + b[i0] * t1) * r1)
    if debug:
        print("{}: z={}, X_0={}, r1={}".format(fn, z, x_0, r1))

    # Calculate the burst properties for the trial mdot value
    trial = settle(base, z, x_0, mdot0, cfac, mass, radius)
    if debug:
        print(
            '{}: initial guess mdot0={} @ t1={}, tdel={}, direction={}'.format(
                fn, mdot0, t1, trial.tdel, direction))

    # Now update the mdot with the value averaged over the trial interval
    if direction == 1:
        mdot = (0.67 / 8.8) * mean_flux(t1, t1 + trial.tdel / 24.0, tobs, a,
                                        b) * r1
    else:
        mdot = (0.67 / 8.8) * mean_flux(t1 - trial.tdel / 24.0, t1, tobs, a,
                                        b) * r1

    # Now retain the entire history of this iteration, so we can check for loops

    mdot_hist = [mdot0]
    tdel_hist = [trial.tdel[0] / 24.]

    nreturn = 0
    while (abs(mdot - mdot_hist[-1]) > mdot_res / 2.0) \
        and (((t1 + trial.tdel / 24.0 < 2.*max(tobs)) & (direction == 1)) \
            or ((t1 - trial.tdel / 24.0 > min(tobs)-(max(tobs)-min(tobs))) & (direction == -1))) \
        and (mdot > minmdot and mdot < maxmdot):

        trial = settle(base, z, x_0, mdot[0], cfac, mass, radius)
        nreturn = nreturn + 1

        mdot_hist.append(mdot[0])
        tdel_hist.append(trial.tdel[0] / 24.)

        if direction == 1:
            mdot = (0.67 / 8.8) * mean_flux(t1, t1 + (trial.tdel / 24.0), tobs,
                                            a, b) * r1

        else:
            mdot = (0.67 / 8.8) * mean_flux(t1 - (trial.tdel / 24.0), t1, tobs,
                                            a, b) * r1

        # Break out of the loop here, if necessary
        if nreturn > 10:
            e = random.random()
            mdot = mdot_hist[-1] * (1.0 - e) + mdot * e
            # Perhaps you should try to reset this randomly every 10 steps? - dkg
            # Yes, otherwise every trial above 10 steps will be random
            nreturn = 0

    # save the final versions to the history arrays
    mdot_hist.append(mdot[0])
    tdel_hist.append(trial.tdel[0] / 24.)
    if debug:
        print('{}: mdot_hist={}'.format(fn, mdot_hist))

        # now produce a diagnostic plot with the debug flag
        # plt.plot(t1+np.array(tdel_hist), mdot_hist, '.', label='tdel history')
        for tdel in tdel_hist:
            plt.axvline(t1 + tdel, color='k', ls='--')
        # also calculate a bunch of values to compare with
        t_arr = np.arange(t1, max(tobs), step=0.1)
        m_arr = [0]
        t_arr2 = [t1]
        for t in t_arr[1:]:
            _mdot = (0.67 / 8.8) * mean_flux(t1, t, tobs, a, b) * r1
            _tmp = settle(base, z, x_0, _mdot, cfac, mass, radius)
            t_arr2.append(t1 + _tmp.tdel[0] / 24.)
            m_arr.append(_mdot)
        plt.plot(t_arr, np.array(t_arr2), '-', label='tdel')
        plt.plot(t_arr, t_arr, '-', label='1:1')
        plt.xlim((0, 1.1 * max(t1 + np.array(tdel_hist))))
        plt.ylim((0, 1.1 * max(t1 + np.array(tdel_hist))))
        # plt.plot(np.array(t_arr2), np.array(m_arr), '.')
        plt.legend()
        plt.show()
        breakpoint()

    # if mdot < minmdot or mdot > maxmdot:
    if abs(mdot - mdot_hist[-2]) > mdot_res / 2.0:
        return None

        # create array
    #print(f'{fn}: mdot={mdot}, tdel={trial.tdel}')
    result = np.recarray((1, ),
                         dtype=[("t2", np.float64), ("e_b", np.float64),
                                ("alpha", np.float64)])
    # assign elements
    result.t2 = t1 + direction * trial.tdel / 24.0
    result.e_b = trial.E_b  # multiply eb by 0.8 to account for incomlpete burning of fuel, as in Goodwin et al (2018).
    result.alpha = trial.alpha
    # result.qnuc = tmp.Q_nuc
    # result.xbar = tmp.xbar
    result.mdot = mdot

    return result
Example #39
0
    def keypoints(self, image):
        """
        Calculates the keypoints of the image
        :param image: ndimage of 2D (or 3D if RGB)
        """
        self.reset_timer()
        with self._sem:
            total_size = 0
            keypoints = []
            descriptors = []
            assert image.shape[:2] == self.shape
            assert image.dtype == self.dtype
            t0 = time.time()

            if self.dtype == numpy.float32:
                if type(image) == pyopencl.array.Array:
                    evt = pyopencl.enqueue_copy(self.queue,
                                                self.buffers[0].data,
                                                image.data)
                else:
                    evt = pyopencl.enqueue_copy(self.queue,
                                                self.buffers[0].data, image)
                if self.profile: self.events.append(("copy H->D", evt))
            elif (len(image.shape) == 3) and (self.dtype
                                              == numpy.uint8) and (self.RGB):
                if type(image) == pyopencl.array.Array:
                    evt = pyopencl.enqueue_copy(self.queue,
                                                self.buffers["raw"].data,
                                                image.data)
                else:
                    evt = pyopencl.enqueue_copy(self.queue,
                                                self.buffers["raw"].data,
                                                image)
                if self.profile: self.events.append(("copy H->D", evt))
                #                print self.procsize[0], self.wgsize[0]
                evt = self.programs["preprocess"].rgb_to_float(
                    self.queue, self.procsize[0], self.wgsize[0],
                    self.buffers["raw"].data, self.buffers[0].data,
                    *self.scales[0])
                if self.profile: self.events.append(("RGB -> float", evt))

            elif self.dtype in self.converter:
                program = self.programs["preprocess"].__getattr__(
                    self.converter[self.dtype])
                evt = pyopencl.enqueue_copy(self.queue,
                                            self.buffers["raw"].data, image)
                if self.profile: self.events.append(("copy H->D", evt))
                evt = program(self.queue, self.procsize[0], self.wgsize[0],
                              self.buffers["raw"].data, self.buffers[0].data,
                              *self.scales[0])
                if self.profile: self.events.append(("convert -> float", evt))
            else:
                raise RuntimeError("invalid input format error")

            k1 = self.programs["reductions"].max_min_global_stage1(
                self.queue, (self.red_size * self.red_size, ),
                (self.red_size, ), self.buffers[0].data,
                self.buffers["max_min"].data,
                numpy.uint32(self.shape[0] * self.shape[1]))
            k2 = self.programs["reductions"].max_min_global_stage2(
                self.queue, (self.red_size, ), (self.red_size, ),
                self.buffers["max_min"].data, self.buffers["max"].data,
                self.buffers["min"].data)
            if self.profile:
                self.events.append(("max_min_stage1", k1))
                self.events.append(("max_min_stage2", k2))
            evt = self.programs["preprocess"].normalizes(
                self.queue, self.procsize[0], self.wgsize[0],
                self.buffers[0].data, self.buffers["min"].data,
                self.buffers["max"].data, self.buffers["255"].data,
                *self.scales[0])
            if self.profile: self.events.append(("normalize", evt))

            #            octSize = 1.0
            curSigma = 1.0 if par.DoubleImSize else 0.5
            octave = 0
            if self._initSigma > curSigma:
                logger.debug("Bluring image to achieve std: %f",
                             self._initSigma)
                sigma = math.sqrt(self._initSigma**2 - curSigma**2)
                self._gaussian_convolution(self.buffers[0], self.buffers[0],
                                           sigma, 0)
    #        else:
    #            pyopencl.enqueue_copy(self.queue, dest=self.buffers[(0, "G_1")].data, src=self.buffers["input"].data)

            for octave in range(self.octave_max):
                kp, descriptor = self._one_octave(octave)
                logger.info("in octave %i found %i kp" % (octave, kp.shape[0]))

                if kp.shape[0] > 0:
                    keypoints.append(kp)
                    descriptors.append(descriptor)
                    total_size += kp.shape[0]

            ########################################################################
            # Merge keypoints in central memory
            ########################################################################
            output = numpy.recarray(shape=(total_size, ), dtype=self.dtype_kp)
            last = 0
            for ds, desc in zip(keypoints, descriptors):
                l = ds.shape[0]
                if l > 0:
                    output[last:last + l].x = ds[:, 0]
                    output[last:last + l].y = ds[:, 1]
                    output[last:last + l].scale = ds[:, 2]
                    output[last:last + l].angle = ds[:, 3]
                    output[last:last + l].desc = desc
                    last += l
            logger.info("Execution time: %.3fms" % (1000 * (time.time() - t0)))

    #        self.count_kp(output)
        return output
Example #40
0
    def match(self, nkp1, nkp2, raw_results=False):
        """Calculate the matching of 2 keypoint list

        :param nkp1: numpy 1D recarray of keypoints or equivalent GPU buffer
        :param nkp2: numpy 1D recarray of keypoints or equivalent GPU buffer
        :param raw_results: if true return the 2D array of indexes of matching keypoints (not the actual keypoints)

        TODO: implement the ROI ...
        """
        assert len(
            nkp1.shape) == 1  # Nota: nkp1.ndim is not valid for gpu_arrays
        assert len(nkp2.shape) == 1
        valid_types = (numpy.ndarray, numpy.core.records.recarray,
                       pyopencl.array.Array)
        assert isinstance(nkp1, valid_types)
        assert isinstance(nkp2, valid_types)
        result = None
        with self.sem:
            if isinstance(nkp1, pyopencl.array.Array):

                kpt1_gpu = nkp1
            else:
                if nkp1.size > self.cl_mem["Kp_1"].size:
                    logger.warning(
                        "increasing size of keypoint vector 1 to %i" %
                        nkp1.size)
                    self.cl_mem["Kp_1"] = pyopencl.array.empty(
                        self.queue, (nkp1.size, ), dtype=self.dtype_kp)
                kpt1_gpu = self.cl_mem["Kp_1"]
                self._reset_buffer1()
                evt1 = pyopencl.enqueue_copy(self.queue, kpt1_gpu.data, nkp1)
                if self.profile:
                    self.events.append(("copy H->D KP_1", evt1))

            if isinstance(nkp2, pyopencl.array.Array):
                kpt2_gpu = nkp2
            else:
                if nkp2.size > self.cl_mem["Kp_2"].size:
                    logger.warning(
                        "increasing size of keypoint vector 2 to %i" %
                        nkp2.size)
                    self.cl_mem["Kp_2"] = pyopencl.array.empty(
                        self.queue, (nkp2.size, ), dtype=self.dtype_kp)
                kpt2_gpu = self.cl_mem["Kp_2"]
                self._reset_buffer2()
                evt2 = pyopencl.enqueue_copy(self.queue, kpt2_gpu.data, nkp2)
                if self.profile:
                    self.events.append(("copy H->D KP_2", evt2))

            if min(kpt1_gpu.size,
                   kpt2_gpu.size) > self.cl_mem["match"].shape[0]:
                self.kpsize = min(kpt1_gpu.size, kpt2_gpu.size)
                self.cl_mem["match"] = pyopencl.array.empty(self.queue,
                                                            (self.kpsize, 2),
                                                            dtype=numpy.int32)
            self._reset_output()
            wg = self.kernel_size["matching"]
            size = calc_size((nkp1.size, ), (wg, ))
            evt = self.kernels.matching(
                self.queue, size, (wg, ), kpt1_gpu.data, kpt2_gpu.data,
                self.cl_mem["match"].data, self.cl_mem["cnt"].data,
                numpy.int32(self.kpsize),
                numpy.float32(par.MatchRatio * par.MatchRatio),
                numpy.int32(nkp1.size), numpy.int32(nkp2.size))
            if self.profile:
                self.events.append(("matching", evt))
            size = self.cl_mem["cnt"].get()[0]
            match = numpy.empty(shape=(size, 2), dtype=numpy.int32)
            if size > 0:
                cpyD2H = pyopencl.enqueue_copy(self.queue, match,
                                               self.cl_mem["match"].data)
            if self.profile:
                self.events.append(("copy D->H match", cpyD2H))
            if raw_results:
                result = match
            else:
                result = numpy.recarray(shape=(size, 2), dtype=self.dtype_kp)

                result[:, 0] = nkp1[match[:size, 0]]
                result[:, 1] = nkp2[match[:size, 1]]
        return result
Example #41
0
def main():
    ###make sure to change these when running in a new enviorment!###
    #location of data directory
    filepath1 = cu.get_output_path() + 'processed_data/NYU_VAGC/'
    savepath1 = filepath1 + 'custom_catalogues/'
    filepath2 = cu.get_output_path() + 'processed_data/mpa_dr7/'
    savepath2 = filepath2 + 'custom_catalogues/'
    filepath3 = cu.get_output_path() + 'processed_data/berlind_groupcat/'
    savepath3 = filepath3 + 'custom_catalogues/'
    #################################################################

    cosmo = FlatLambdaCDM(H0=100,
                          Om0=0.3169)  #h=1, Omega_m=0.3, Omega_Lambda=0.7

    catalogue1 = 'nyu_vagc_dr7'
    catalogue2 = 'gal_info_gal_totspecsfr_dr7_v5_2'
    try:
        sys.argv[1]
    except:
        mass = '10.2'
    else:
        mass = sys.argv[1]
    catalogue3 = 'smthresh' + mass + '.groups'
    print 'reading in', catalogue3, 'catalogue...'
    catalogue3_new = 'sample3_M_model.sm' + mass
    print 'making', catalogue3_new, 'catalogue...'

    #open nyu vagc
    print catalogue1
    f1 = h5py.File(filepath1 + catalogue1 + '.hdf5', 'r')
    dset1 = f1.get(catalogue1)
    match13 = np.load(filepath1 + 'berlind_groupcat_match/' + catalogue3 +
                      '_' + catalogue1 + '_match.npy')

    #open mpa
    print catalogue2
    f2 = h5py.File(filepath2 + catalogue2 + '.hdf5', 'r')
    dset2 = f2.get(catalogue2)
    match23 = np.load(filepath2 + 'berlind_groupcat_match/' + catalogue3 +
                      '_' + catalogue2 + '_match.npy')

    #open groupcat
    print catalogue3
    f3 = h5py.File(filepath3 + catalogue3 + '.hdf5', 'r')
    dset3 = f3.get(catalogue3)
    match31 = np.load(filepath3 + 'nyu_vagc_match/' + catalogue1 + '_' +
                      catalogue3 + '_match.npy')
    match32 = np.load(filepath3 + 'mpa_dr7_match/' + catalogue2 + '_' +
                      catalogue3 + '_match.npy')

    #if you want to know the colum names of the data sets...
    #print dset1.dtype.descr
    #print ' '
    #print dset2.dtype.descr
    #print ' '
    #print dset3.dtype.descr

    #here is the data model for the new group catalogue
    dtype=[('ID','>i8'),('RA','>f8'),('DEC','>f8'),\
           ('Z','>f8'),('Z_ERR','>f8'),('Z_TYPE','>i8'),('VELDISP','>f8'),('VELDISP_ERR','>f8'),('FIBERCOL','>i8'),\
           ('M_u,0.1','>f8'),('M_g,0.1','>f8'),('M_r,0.1','>f8'),('M_i,0.1','>f8'),('M_z,0.1','>f8'),\
           ('N_SERSIC','>f8'),\
           ('MSTAR','>f8'),('SSFR','>f8'),\
           ('GROUP_ID','>i8'),('MGROUP','>f8'),('ZGROUP','>f8'),('R200','>f8'),('RPROJ','>f8'),('CEN_IND','>i8')]
    dtype = np.dtype(dtype)

    #define fiber collison galaxies
    result_GC = np.where(dset1['SDSS_SPECTRO_TAG'][match31] == -1)[
        0]  #where in the group catalogue are the collisions
    collision = np.zeros(len(match31), dtype=int)
    collision[result_GC] = 1  #this flag==1 if this is a collision galaxy

    #create array to store catalogue in
    data = np.recarray((len(dset3), ), dtype=dtype)
    data.fill(-99.9)  #if no value is available, set = -99.9

    #create gal ID's
    ID = np.arange(0, len(data), 1).astype(int)

    #input basics
    data['ID'] = ID
    data['RA'] = dset3['ra']
    data['DEC'] = dset3['dec']
    data['Z'][match13] = dset1['Z'][
        match31]  #redshift if available from anywhere
    data['Z_ERR'][match13] = dset1['Z_ERR'][match31]  #redshift err from sdss
    data['Z_TYPE'][match13] = dset1['ZTYPE'][match31]  #source of reshift
    data['VELDISP'][match13] = dset1['VDISP'][match31]
    data['VELDISP_ERR'][match13] = dset1['VDISP_ERR'][match31]
    data['FIBERCOL'][match13] = collision

    #do K+E corrected ABS magnitude
    AQ = [-4.22, -2.04, -1.62, -1.61, -0.76]
    EQ = AQ[0] * (data['Z'] - 0.1)
    x = dset1['ABSMAG_u.nearest.model.z0.10'][match31] - EQ
    data['M_u,0.1'] = x
    EQ = AQ[1] * (data['Z'] - 0.1)
    x = dset1['ABSMAG_g.nearest.model.z0.10'][match31] - EQ
    data['M_g,0.1'] = x
    EQ = AQ[2] * (data['Z'] - 0.1)
    x = dset1['ABSMAG_r.nearest.model.z0.10'][match31] - EQ
    data['M_r,0.1'] = x
    EQ = AQ[3] * (data['Z'] - 0.1)
    x = dset1['ABSMAG_i.nearest.model.z0.10'][match31] - EQ
    data['M_i,0.1'] = x
    EQ = AQ[4] * (data['Z'] - 0.1)
    x = dset1['ABSMAG_z.nearest.model.z0.10'][match31] - EQ
    data['M_z,0.1'] = x

    #apply some derived quantities
    data['N_SERSIC'] = dset1['SERSIC_N_r'][match31]
    data['MSTAR'] = dset3['Mstar']  #take from Berlind
    data['SSFR'][match23] = dset2['MEDIAN'][match32]

    #add some group properties
    data['GROUP_ID'] = dset3['groupID']
    data['MGROUP'] = np.log10(dset3['Mgroup'])

    #identify central galaxy in groups
    group_IDs = np.unique(data['GROUP_ID'])
    for group in group_IDs:  #run through each group and identify central galaxy
        members = np.where(data['GROUP_ID'] == group)[0]
        largest_mass = np.max(data['MSTAR'][members])  #central is most massive
        central = np.where((data['GROUP_ID'] == group)
                           & (data['MSTAR'] == largest_mass))[0]
        central = central[
            0]  #should only be one of these, so make it the central
        data['CEN_IND'][members] = central
        data['ZGROUP'][members] = data['Z'][central]
        da = cu.spheredist(data['RA'][central], data['DEC'][central],
                           data['RA'][members], data['DEC'][members])
        chi = cosmology.funcs.comoving_distance(data['ZGROUP'][central],
                                                cosmo=cosmo) * 1000.0  #in kpc
        dl = cosmology.funcs.luminosity_distance(data['ZGROUP'][central],
                                                 cosmo=cosmo) * 1000.0  #in kpc
        data['RPROJ'][members] = chi / (
            1.0 + data['ZGROUP'][members]) * da  #caclulate physical seperation
    Omega_m = 0.3169
    x = 258.1 * (10**data['MGROUP'] / (10.0**12.0))**(1.0 / 3.0) * (
        Omega_m / 0.25)**(1.0 / 3.0) * (1.0 + data['ZGROUP'])**(-1.0)
    data['R200'] = x

    print 'saving hdf5 version of the catalogue...'
    filename = catalogue3_new
    f = h5py.File(savepath3 + filename + '.hdf5', 'w')
    dset = f.create_dataset(filename, data=data)
    f.close()

    print 'saving ascii version of the catalogue...'
    filename = catalogue3_new
    data_table = table.table.Table(data=data)
    ascii.write(data_table, savepath3 + filename + '.dat')
    print data_table
Example #42
0
def do_clustering(
    alldecks,
    prefix="MTGTOP8",
    deck_guess=KLD_deck_guess,
    start_date=datetime.date(year=2016, month=10, day=1),
    timestep=7,
    standard_set_savename='data/standard_legal.json',
    forced_legal=[],
    card_to_check=None,
):

    standard_legal = [
        x.lower() for x in get_standard_legal(savename=standard_set_savename,
                                              forced_legal=forced_legal)
    ]
    if card_to_check is not None:
        assert card_to_check in standard_legal

    card_namespace = set(card.lower() for dailies in alldecks.values()
                         for deck in dailies.values()
                         for card in deck['mainboard']
                         if card.lower() in standard_legal)

    lowercase_decks = {
        date: {
            key: {
                board:
                ({(card.lower() if '/' not in card else
                   card.lower().split("/")[0].strip()): deck[board][card]
                  for card in deck[board]}
                 if board not in ('eventid', 'record') else deck[board])
                for board in deck
            }
            for key, deck in results.items()
        }
        for date, results in alldecks.items()
    }

    legaldecks = {
        date: results
        for date, results in lowercase_decks.items()
        if not any(card.lower() not in standard_legal
                   for deck in results.values() for card in deck['mainboard'])
    }
    illegalcards = {
        date: {
            user: ([
                card for card in deck['mainboard']
                if card.lower() not in standard_legal
            ], deck['eventid'])
            for user, deck in results.items() if any([
                card for card in deck['mainboard']
                if card.lower() not in standard_legal
            ])
        }
        for date, results in lowercase_decks.items()
        if any(card.lower() not in standard_legal for deck in results.values()
               for card in deck['mainboard'])
    }

    deckcount = sum(len(x) for x in legaldecks.values())
    print("Found {0} decks".format(deckcount))

    array = np.recarray(deckcount,
                        dtype=([('ID', 'S40'), ('Date', 'S10'),
                                ('EventID', int), ("Archetype", "S25")] +
                               [(name, np.int16) for name in card_namespace]))

    ii = 0
    for date, daily in legaldecks.items():
        for deckname, deck in daily.items():
            array['ID'][ii] = deckname.encode('ascii', errors='replace')
            array['Date'][ii] = date
            array['EventID'][ii] = deck['eventid']

            for card in card_namespace:
                if card in deck['mainboard']:
                    array[card][ii] = int(deck['mainboard'][card]) + 10
                else:
                    array[card][ii] = 0

            ii = ii + 1

    pd = pandas.DataFrame(array)

    # get rid of the id column and date column
    justdata = pd.T[4:]

    # import skfuzzy as fuzz
    # # Set up the loop and plot
    # #fig1, axes1 = pl.subplots(3, 3, figsize=(8, 8))
    # fpcs = []
    #
    #
    # for ncenters in range(2,20):
    #     cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
    #         data=justdata, c=ncenters, m=2, error=0.005, maxiter=1000, init=None)
    #
    #     # Store fpc values for later
    #     fpcs.append(fpc)
    #
    # #    # Plot assigned clusters, for each data point in training set
    #     cluster_membership = np.argmax(u, axis=0)
    #
    #     print()
    #     print("Nclusters = {0}".format(ncenters))
    #     print()
    #
    #     for ii in range(ncenters):
    #         mask = cluster_membership==ii
    #         deck = (justdata.T[mask] > 0).sum(axis=0)
    #         deck.sort_values(inplace=True)
    #         if any(k in deck.keys()[-10:] for k in easy_decks):
    #             for k in easy_decks:
    #                 if k in deck.keys()[-10:]:
    #                     name = k
    #             print("Deck {0}={2}: {1} matches, {3}%".format(ii, mask.sum(), name, mask.sum()/len(mask)))
    #         else:
    #             print("Deck {0}: {1} matches, {2}%".format(ii, mask.sum(), mask.sum()/len(mask)))
    #         print(deck[-10:])
    # #    for j in range(ncenters):
    # #        ax.plot(xpts[cluster_membership == j],
    # #                ypts[cluster_membership == j], '.', color=colors[j])
    # #
    # #    # Mark the center of each fuzzy cluster
    # #    for pt in cntr:
    # #        ax.plot(pt[0], pt[1], 'rs')
    # #
    # #    ax.set_title('Centers = {0}; FPC = {1:.2f}'.format(ncenters, fpc))
    # #    ax.axis('off')
    # #
    # #fig1.tight_layout()
    #
    # fig2, ax2 = pl.subplots()
    # ax2.plot(range(2,20), fpcs)
    # ax2.set_xlabel("Number of centers")
    # ax2.set_ylabel("Fuzzy partition coefficient")

    distortions = []

    deck_class = {
        'Panharmonicon': ['Panharmonicon'],
        'Metalwork Colossus': ['Metalwork Colossus'],
        'Aetherworks Marvel': ['Aetherworks Marvel'],
        #'BG Delirium Aggro': ['Grim Flayer', ],
        'BG Delirium Control': [
            "Liliana, the Last Hope", 'Grim Flayer', 'Ishkanah, Grafwidow',
            'Grasp of Darkness', 'Vessel of Nascency', 'Noxious Gearhulk',
            'Ruinous Path'
        ],
        'UW Flash': [
            'Reflector Mage', "Smuggler's Copter", "Thraben Inspector",
            "Prairie Stream"
        ],
        'Bux Graveyard':
        ['Haunted Dead', 'Prized Amalgam', 'Voldaren Pariah', 'Cryptbreaker'],
        'RW Vehicle Aggro': [
            "Smuggler's Copter", "Inspiring Vantage", "Pia Nalaar",
            "Toolcraft Exemplar", "Thraben Inspector"
        ],
        'Mardu Vehicle Aggro': [
            "Concealed Courtyard", "Scrapheap Scrounger", "Smuggler's Copter",
            "Inspiring Vantage", "Toolcraft Exemplar", "Thraben Inspector"
        ],
        'Wx Humans': [
            "Thalia's Lieutenant", "Thraben Inspector", "Town Gossipmonger",
            "Expedition Envoy", "Always Watching"
        ],  # has a dwarf?!
        'RG Energy Aggro': [
            'Servant of the Conduit', 'Attune with Aether', 'Longtusk Cub',
            'Voltaic Brawler', 'Bristling Hydra'
        ],
        'Grixis Graveyard Emerge': [
            'Elder Deep-Fiend', "Kozilek's Return", "Prized Amalgam",
            "Cathartic Reunion", "Haunted Dead", "Wretched Gryff"
        ],
        'RG Pummeler': [
            "Electrostatic Pummeler", 'Servant of the Conduit',
            'Attune with Aether', "Blossoming Defense", "Built to Smash"
        ],
        'RB Aggro':
        ['Fiery Temper', 'Bomat Courier', 'Unlicensed Disintegration'],
        'UR Control': [
            'Torrential Gearhulk', 'Glimmer of Genius', 'Harnessed Lightning',
            'Spirebluff Canal', 'Wandering Fumarole'
        ],
        'UW Control': [
            'Torrential Gearhulk', 'Glimmer of Genius', 'Immolating Glare',
            'Blessed Alliance'
        ],
        'UB Control': [
            'Torrential Gearhulk', 'Glimmer of Genius', 'Grasp of Darkness',
            'Liliana, the Last Hope', 'Sunken Hollow'
        ],
    }

    easy_decks = ('Panharmonicon', 'Metalwork Colossus', 'Aetherworks Marvel')

    # +1 for the "others"
    guess_array = np.zeros([len(deck_guess) + 1, justdata.shape[0]])
    for ii, (deckname, deck) in enumerate(deck_guess.items()):
        for card in deck:
            if card.lower() not in pd.columns:
                print(
                    "Deck {0} is not represented in the meta".format(deckname))
                #raise ValueError("Card {0} is not real".format(card))
            guess_array[ii,
                        justdata.T.keys() == card.lower()] = deck[card] + 10

    codebook, distortion = scipy.cluster.vq.kmeans(
        np.array(justdata.T, 'float'), guess_array)
    code, dist = scipy.cluster.vq.vq(np.array(justdata.T, 'int'), codebook)
    #cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
    #    data=justdata, c=ncenters, m=2, error=0.005, maxiter=1000, init=None)

    pd['Distortion'] = dist

    # Store fpc values for later
    distortions.append(distortion)

    cluster_membership = code

    deck_50_pct = {}
    deck_counts = {}
    deck_top20s = {}
    deck_ids = {}

    def top20_match(x):
        x = list(x)
        if len(x) == 0:
            return 0
        return np.sum(x) / len(x)

    for ii in range(len(deck_guess) + 1):
        mask = cluster_membership == ii
        deck = (justdata.T[mask] > 0).sum(axis=0)
        deck.sort_values(inplace=True)

        deck_top20s[ii] = deck

        name = None
        card_match_fraction = {
            dk:
            top20_match(k.lower() in
                        [this_key.lower() for this_key in deck.keys()[-20:]]
                        for k in ks)
            for dk, ks in deck_guess.items()
        }
        bestfrac = 0
        for dk, frac in card_match_fraction.items():
            if frac > bestfrac:
                bestfrac = frac
                name = dk
        if bestfrac < 0.7:
            print("### Deck {0} has bad matches {1}".format(ii, bestfrac))
            name = None
        if name is None:
            print("Deck {0}: {1} matches, {2:0.2f}% of total".format(
                ii, mask.sum(),
                mask.sum() / len(mask) * 100))
            deck_ids[ii] = ii
            deck_50_pct[ii] = mask.sum() / len(mask)
            deck_counts[ii] = mask.sum()
            pd.ix[mask, 'Archetype'] = "Other " + str(ii)
        else:
            if name in deck_50_pct:
                print("**********DUPLICATE**********")
                pd.ix[mask, 'Archetype'] = name + str(ii)
            else:
                pd.ix[mask, 'Archetype'] = name
            deck_50_pct[name] = mask.sum() / len(mask)
            deck_counts[name] = mask.sum()
            print("Deck {0}={2}: {1} matches, {3:0.2f}% of total".format(
                ii, mask.sum(), name,
                mask.sum() / len(mask) * 100))
            deck_ids[name] = ii
            #pd['Archetype'][mask] = name
        #print(deck[-20:])

    print(len(deck_50_pct), deck_50_pct)

    final = pandas.DataFrame.from_dict([
        (name, deck_50_pct[name], deck_counts[name]) for name in deck_50_pct
    ])
    final.sort_values(by=1, inplace=True)
    print(final.sort_values(by=1))

    week_starts = [
        day
        for day in daterange(start_date, datetime.date.today(), step=timestep)
    ]

    weekly_summary = pandas.DataFrame(index=week_starts,
                                      columns=deck_guess.keys())

    dates = pandas.to_datetime([x.decode() for x in pd.Date])
    dates = np.array([
        datetime.date(year=2000 + int(x[6:8]),
                      month=int(x[3:5]),
                      day=int(x[0:2])) for x in pd.Date
    ])

    for week_start in week_starts:
        week_end = week_start + datetime.timedelta(timestep)
        date_matches = (dates >= week_start) & (dates < week_end)
        for deck in deck_guess:
            deck_matches = (pd.Archetype == deck) & date_matches
            weekly_summary[deck][week_start] = deck_matches.sum(
            ) / date_matches.sum()

    weekly_summary.plot(style=[
        x + 'o' + y for x, y in zip('rgbcmykrgbcmykrgbcmykrgbcmyk', ['-'] * 7 +
                                    ['--'] * 7 + [':'] * 7 + ['-.'] * 7)
    ],
                        figsize=[24, 20])
    pl.xlabel("First date in week")
    pl.ylabel("Fraction decks in that week")
    pl.legend(loc='center left', bbox_to_anchor=(1.05, 0.5))
    pl.savefig("{prefix}_meta.png".format(prefix=prefix), bbox_inches='tight')

    def get_deck(num):
        deck = pd.loc[num]
        return deck[deck != 0]

    return pd, get_deck, deck_50_pct, deck_ids, deck_top20s, deck_counts
Example #43
0
def _Combine60sDateSpecies(Date,
                           Species='H',
                           Verbose=True,
                           Overwrite=False,
                           DryRun=False):
    '''
	Combines the relevant files for a given species on a given date.
	
	Inputs
	=======
	Date : integer, format: yyyymmdd
	Species: string 'H','He','He2','O','Na'
	
	'''

    #use species to calculate some constants
    mass = Globals.Constants.amu * Globals.IonMass.get(Species,
                                                       Globals.IonMass['H'])
    e = Globals.Constants.e
    g = Globals.Constants.g
    kB = Globals.Constants.kB
    dOmega = Globals.Constants.dOmega
    eqbins0 = Globals.EQBins[0]
    eqbins2 = Globals.EQBins[2]
    if Species == 'He2':
        vbins0 = np.sqrt((2 * e * 2000.0 * eqbins0) / mass)
        vbins2 = np.sqrt((2 * e * 2000.0 * eqbins2) / mass)
    else:
        vbins0 = np.sqrt((e * 2000.0 * eqbins0) / mass)
        vbins2 = np.sqrt((e * 2000.0 * eqbins2) / mass)

    #get output dtype, file name and path
    OutPath = Globals.MessPath + 'FIPS/Combined/60s/{:s}/'.format(Species)
    if not os.path.isdir(OutPath):
        os.system('mkdir -pv ' + OutPath)
    dtype = Globals.dtype60s
    fname = OutPath + '{:08d}.bin'.format(Date)

    if os.path.isfile(fname) and not Overwrite and not DryRun:
        print("File {:s} exists".format(fname))
        return

    #read in the four data files (if they exist)
    dS = ReadData(Date, 'espec')
    dN = ReadData(Date, 'ntp')
    dE = ReadData(Date, 'edr')
    dC = ReadData(Date, 'cdr')
    if Species == 'H':
        dA = ReadData(Date, 'ann')
    else:
        dA = None

    #check that there are any data points:
    if dE.size == 0 and dC.size == 0 and dS.size == 0 and dN.size == 0:
        return  #no data found at all for this date

    #now we need to work out how many records there are - NTP values
    #don't exist for all data, so using that will cut out other spectra
    #might be a good idea to group up the CDR or EDR data
    StartMET = np.copy(dN.StartMET)
    StopMET = np.copy(dN.StopMET)
    StartInd = np.copy(dN.StartIndex)
    StopInd = np.copy(dN.StopIndex)
    nN = dN.size
    grouped = np.zeros(dS.size, dtype='bool')
    for i in range(0, nN):
        use = np.where((dS.Index >= StartInd[i]) & (dS.Index <= StopInd[i]))[0]
        grouped[use] = True

    #now to group up the rest
    notgrouped = grouped == False
    ng = np.where(notgrouped)[0]
    met = dS.MET[ng]
    ind = dS.Index[ng]

    if ng.size > 0:
        StM = []
        SpM = []
        StI = []
        SpI = []
        i = 0
        while i < ng.size:
            use = np.where((met >= met[i]) & (met <= met[i] + 60.0))[0]
            StM.append(met[use[0]])
            SpM.append(met[use[-1]])

            StI.append(ind[use[0]])
            SpI.append(ind[use[-1]])

            i = use[-1] + 1

        StartMET = np.append(StartMET, np.array(StM))
        StopMET = np.append(StopMET, np.array(SpM))
        StartInd = np.append(StartInd, np.array(StI))
        StopInd = np.append(StopInd, np.array(SpI))

        srt = np.argsort(StartMET)

        StartMET = StartMET[srt]
        StopMET = StopMET[srt]
        StartInd = StartInd[srt]
        StopInd = StopInd[srt]

    #now we should have grouped all of the data, time to create the output array
    n = np.size(StartMET)
    if n == 0:
        print('no data')
        return
    out = np.recarray(n, dtype=dtype)

    #save some ion info
    spstr = Species + (3 - (len(Species))) * ' '
    out.Ion = spstr
    out.Mass = mass

    #save ut and MET
    met0 = dC.MET[0] - dC.ut[0] * 3600.0  #MET at the start of the day
    out.Date = Date
    out.MET = StopMET
    out.ut = (out.MET - met0) / 3600.0
    out.StartIndex = StartInd
    out.StopIndex = StopInd

    #continuous ut
    out.utc = ContUT(out.Date, out.ut)

    #position
    pos = GetPosition(Date)
    if pos.size > 0:
        fx = interp1d(pos.ut,
                      pos.x,
                      kind='cubic',
                      bounds_error=False,
                      fill_value='extrapolate')
        fy = interp1d(pos.ut,
                      pos.y,
                      kind='cubic',
                      bounds_error=False,
                      fill_value='extrapolate')
        fz = interp1d(pos.ut,
                      pos.z,
                      kind='cubic',
                      bounds_error=False,
                      fill_value='extrapolate')
        out.x = fx(out.ut)
        out.y = fy(out.ut)
        out.z = fz(out.ut)
    else:
        pos.x = np.nan
        pos.y = np.nan
        pos.z = np.nan

    #location
    out.Loc = GetRegion(out.Date, out.ut, out.utc, Verbose=False)

    #set default CDR quality flag
    #Normally 0 = good, 1 = bad, here -1 = not present
    out.CDRQuality[:] = -1
    out.NTPQuality[:] = -1

    #match ut with ANN output and get ANN outputs
    out.Class[:] = -1
    out.SplitClass[:, :] = -1
    out.Prob[:] = np.nan
    out.SplitProb[:, :] = np.nan
    out.nk[:] = np.nan
    out.tk[:] = np.nan
    out.pk[:] = np.nan
    out.k[:] = np.nan

    if not dA is None:
        if dA.size > 0:
            Imatch, _ = MatchUT(out.ut, dA.ut)
            ngood = np.sum(Imatch > -1)
            if ngood == dA.size:
                if Verbose:
                    print('ANN data match')
            elif ngood < dA.size:
                print('WARNING: missing {:d} ANN points'.format(dA.size -
                                                                ngood))
            else:
                print(
                    'WARNING: too many matches, something really bad has happened!'
                )
            for i in range(0, Imatch.size):
                if Imatch[i] > -1:
                    out.Class[i] = dA.Class[Imatch[i]]
                    out.SplitClass[i] = dA.SplitClass[Imatch[i]]
                    out.Prob[i] = dA.Prob[Imatch[i]]
                    out.SplitProb[i] = dA.SplitProb[Imatch[i]]
                    out.nk[i] = dA.nk[Imatch[i]]
                    out.tk[i] = dA.tk[Imatch[i]]
                    out.k[i] = dA.K[Imatch[i]]
                    out.pk[i] = out.nk[i] * 1e6 * kB * out.tk[i] * 1e6 * 1e9

    #get the appropriate flux
    Flux = dS[Species + 'Flux']

    #loop through groups
    for i in range(0, n):
        if Verbose:
            print('\rCopying data {:f}%'.format(100.0 * (i + 1) / n), end='')
        #get the METS from ESPEC first, the rest have to match this!
        useS = np.where((dS.Index >= StartInd[i])
                        & (dS.Index <= StopInd[i]))[0]
        METS = dS.MET[useS]

        out[i].StartMET = METS[0]
        out[i].StopMET = METS[-1]
        out[i].MET = METS[-1]

        #now find the other indices by using the MET list
        useE = np.where(InArray(dE.MET, METS))[0]
        useC = np.where(InArray(dC.MET, METS))[0]

        #useE = np.where((dE.MET >= StartMET[i]) & (dE.MET <= StopMET[i]))[0]

        #useC = np.where((dC.MET >= StartMET[i]) & (dC.MET <= StopMET[i]))[0]
        useN = np.where(dN.StartIndex == StartInd[i])[0]

        #get NSpec
        out[i].NSpec = useS.size

        #set E/Q and V bins
        if useE.size == 0:
            out[i].ScanType = -1
            out[i].EQBins = eqbins0
            out[i].Tau = 0.095
        else:
            out[i].ScanType = stats.mode(dE[useE].ScanType)[0][0]
            if out[i].ScanType == 0:
                out[i].EQBins = eqbins0
                out[i].VBins = vbins0 / 1000.0
                out[i].Tau = 0.095
            else:
                out[i].EQBins = eqbins2
                out[i].VBins = vbins2 / 1000.0
                out[i].Tau = 0.005

        #copy counts across,summing over spectra (proton counts only here)
        if useE.size > 0 and Species == 'H':
            out[i].Counts = np.sum(dE.ProtonRate[useE], 0)
        else:
            out[i].Counts[:] = 0

        #now to move the fluxes over from ESPEC
        if useS.size > 0:
            out[i].Flux = np.nanmean(Flux[useS], 0)

            #calculate PSD
            out[i].PSD = out[i].Flux * (mass / (out[i].VBins**2)) * (10.0 / e)

        #save the quality flags
        if useC.size > 0 and Species == 'H':
            out[i].CDRQuality[:useC.size] = dC[useC].Quality

        #input NTP values if they exist
        out.HasNTP[i] = False
        out.n[i] = np.nan
        out.t[i] = np.nan
        out.p[i] = np.nan
        if useN.size > 0 and Species == 'H':
            #currently this only exists for H
            out.n[i] = dN[useN[0]].n
            out.t[i] = dN[useN[0]].t
            out.p[i] = dN[useN[0]].p
            out.HasNTP[i] = True
            out.NTPQuality[i] = dN[useN[0]].Quality
    if Verbose:
        print()

    #This following bit will only work for protons currently, for all other ions Eff = 1
    if Species == 'H':
        #calculate efficiencies
        Tau2 = np.array([5] * 52 + [0] * 12) / 1000.0
        Tau0 = np.array([95] * 60 + [0] * 4) / 1000.0
        Eff = np.zeros((n, 64), dtype='float32')
        for i in range(0, n):
            if Verbose:
                print('\rCalculating Efficiencies {:f}%'.format(100.0 *
                                                                (i + 1) / n),
                      end='')
            if out[i].ScanType == 0:
                Ebins = eqbins0
                Tau = Tau0
            else:
                Ebins = eqbins2
                Tau = Tau2
            zero = np.where(out[i].Counts == 0)[0]
            Eff[i] = _CalculateProtonEff(Ebins, Tau, out[i].Flux,
                                         out[i].Counts)
            Eff[i][zero] = np.nan

        if np.size(Eff.shape) == 2:
            Eff = np.nanmean(Eff, 0)
            Eff[np.isfinite(Eff) == False] = np.nan
    else:
        Tau2 = np.array([5] * 52 + [0] * 12) / 1000.0
        Tau0 = np.array([95] * 60 + [0] * 4) / 1000.0
        Eff = np.zeros((n, 64), dtype='float32')
        for i in range(0, n):
            if Verbose:
                print('\rCalculating Efficiencies {:f}%'.format(100.0 *
                                                                (i + 1) / n),
                      end='')
            if out[i].ScanType == 0:
                Ebins = eqbins0
                Tau = Tau0
            else:
                Ebins = eqbins2
                Tau = Tau2
            zero = np.where(out[i].Counts == 0)[0]
            Eff[i] = Tau * 1.0
            Eff[i][zero] = np.nan
        if np.size(Eff.shape) == 2:
            Eff = np.nanmean(Eff, 0)
            Eff[np.isfinite(Eff) == False] = np.nan
    if Verbose:
        print()

    if Species == 'H':
        #attempt to refit the spectrum with a kappa distribution
        for i in range(0, n):
            if Verbose:
                print('\rRefitting Spectra {:f}%'.format(100.0 * (i + 1) / n),
                      end='')
            #save efficiency
            out[i].Efficiency[:] = Eff

            #changed this bit so that fitting only happens if n,T,K haven't already been defined in ANN data
            if np.isnan(out[i].nk):
                #set starting guess for n and T based on original fits if they exist
                if np.isnan(out[i].n):
                    n0 = 2.0e6
                    T0 = 10.0e6
                else:
                    n0 = out[i].n * 1e6
                    T0 = out[i].t * 1e6

                #now try fitting
                nTK = FitKappaDistCts(out.VBins[i] * 1000.0, out.Counts[i], n0,
                                      T0, dOmega, mass, Eff, out[i].NSpec,
                                      out[i].Tau, g)
                #check that the values are all positive at least
                if nTK[0] > 0 and nTK[1] > 0 and nTK[2] > 0:
                    out[i].nk = nTK[0] / 1e6
                    out[i].tk = nTK[1] / 1e6
                    out[i].k = nTK[2]
                    out[i].pk = nTK[0] * kB * nTK[1] * 1e9
                else:
                    out[i].nk = np.nan
                    out[i].tk = np.nan
                    out[i].k = np.nan
                    out[i].pk = np.nan

        if Verbose:
            print()
    if out.size > 0 and not DryRun:
        RT.SaveRecarray(out, fname)
    return out
Example #44
0
def predictIterative(printFlag):

    #TODO: remove / hardcode the following parameters
    #	remove any superfluous ...

    # Parameters

    numVotes = 11
    usePos = True
    # True/False: limit to only Positive coefficient values

    # Label for pos & neg labels
    pLabel = 1
    nLabel = 0
    negMultiplier = 1

    # LASSO params
    lMaxIter = 1000
    lNorm = True
    lFitIcpt = True

    useFeatPathZScore = True
    # True/False: use the pathsim sum features
    fZScoreSim = 'features_ZScoreSim.gz'
    # File name containing path z-score vectors
    useFeatTermWeights = True
    # True/False: use the indirect term features
    useFeatNeighbor = False
    # True/False: use the neighborhood features
    useGivenRange = np.linspace(0.00001, 0.05, num=27)
    # array of vals; 'None' means to auto-search for alphas

    # Control the iterations & error
    numVotes = 11
    # how many random samples for comparison
    retrySubPortion = 0.75
    # how many of Known to keep in new sub-sample
    retryMinValid = 9
    # minimum Known genes to use for PosTrain

    if printFlag:
        print("\nPerforming regression(s) on {}".format(sDir))

    # 0) Create the useLabel variable
    # string: label for the output files
    # ie: ClusVote_c<Las/Enet/Log/SVM><P for Pos>_f<P for pathsim><Z for z-score>
    #	<T for term weights><N for neighborhood>_m<neg sample size multiplier>
    useLabel = 'Vote{}_cLas'.format(numVotes)
    if usePos:
        useLabel = useLabel + 'P'
    useLabel = useLabel + '_f'
    if useFeatPathZScore:
        useLabel = useLabel + 'Z'
    if useFeatTermWeights:
        useLabel = useLabel + 'T'
    if useFeatNeighbor:
        useLabel = useLabel + 'N'
    useLabel = useLabel + '_m{}'.format(negMultiplier)

    if printFlag:
        print("Using label: {}".format(useLabel))

    # 1) Load the gene-index dictionary & path names
    geneDict, pathDict = cl.getGeneAndPathDict(sDir)
    geneNames = list(geneDict.keys())
    geneNames.sort()
    pathNames = cl.removeInvertedPaths(pathDict)
    del pathDict

    # 2) Load the network general features
    numFN = 0
    if useFeatNeighbor:
        featNbVals, featNbNames = cl.getFeaturesNeighborhood(sDir, 'LogScale')
        featNbNames = np.ravel(featNbNames)
        numFN = len(featNbNames)
    #end if
    numTW = 0
    if useFeatTermWeights:
        featTWVals, featTWNames = cl.getFeaturesTerms(sDir, 'Orig')
        featTWNames = np.ravel(featTWNames)
        numTW = len(featTWNames)
    #end if

    # 3) Loop over the list of the sample subdirectories
    dSubDirs = cl.getSubDirectoryList(sDir)

    thisRound = 0
    for si in dSubDirs:
        thisRound += 1

        # Display directory to examine
        sv = si.split('/')
        if printFlag:
            print("\n{}/{}/".format(sv[-3], sv[-2]))

        # Create index lists for Known, Hidden, Unknown, TrueNeg from files
        giKnown, giUnknown, giHidden, giTrueNeg = cl.getGeneIndexLists(
            si, geneDict)
        giAll = list()
        giAll.extend(giKnown)
        giAll.extend(giUnknown)
        giAll.sort()

        # 4) Load the sample-specific features
        numFP = 0
        # z-score of path counts features
        if useFeatPathZScore:
            featZSVals = np.loadtxt(si + fZScoreSim)
            featZSVals = featZSVals[:, 0:len(pathNames)]
            featZSNames = pathNames
            numFP = len(featZSNames)
        #end if

        # 5) Combine the features as specified by parameters (useFeat...)
        features = np.zeros((len(geneDict), 0), dtype=np.float32)
        featNames = list()

        if useFeatPathZScore:
            if printFlag:
                print("    ... including path z-score features")
            features = np.hstack((features, featZSVals))
            featNames.extend(featZSNames)
        if useFeatNeighbor:
            if printFlag:
                print("    ... including neighborhood features")
            features = np.hstack((features, featNbVals))
            featNames.extend(np.ravel(featNbNames))
        if useFeatTermWeights:
            # Remove terms with no connection to gene set
            sumFTV = np.sum(featTWVals[giKnown, :], axis=0)
            keepIdx = np.nonzero(sumFTV)
            numTW = len(keepIdx[0])
            if printFlag:
                print("    ... including term membership features")
            features = np.hstack((features, featTWVals[:, keepIdx[0]]))
            featNames.extend(np.ravel(featTWNames[keepIdx]))
        # verify some features have been loaded
        numFeatAll = len(featNames)
        if features.shape[1] == 0:
            print("ERROR: No features were specified for classification.")
            sys.exit
        #end if

        # Normalize the feature values
        features = cl.normalizeFeatureColumns(features)

        # Create the structure to rank the Unknown genes & paths
        geneRanks = np.zeros((len(geneDict), 1), dtype=np.int32)
        geneScores = np.zeros((len(geneDict), 1), dtype=np.float32)

        #TODO: How to save feature rankings ??

        voteScores = np.zeros((len(giAll), numVotes), dtype=np.float32)
        #			voteScores = np.zeros( (len(geneDict), numVotes), dtype=np.float32)

        if printFlag:
            print("{} votes; known: {}, total: {}, trainSet: {}".format(
                numVotes, len(giKnown), len(giAll),
                (len(giKnown) * (1 + negMultiplier))))
        #end if

        # Store how many samples use certain features
        featT1List = np.zeros((numFeatAll, 1), dtype=np.int16)
        featT5List = np.zeros((numFeatAll, 1), dtype=np.int16)
        featTAList = np.zeros((numFeatAll, 1), dtype=np.int16)
        # featT1Dict = dict()
        # featT1Set = set()
        # featT5Dict = dict()
        # featT5Set = set()
        # featTADict = dict()
        # featTASet = set()

        # 6) Prepare the test/train vectors & labels
        # Extract the vectors for the pos sets

        retrySubSample = False
        retries = 0
        vote = 0
        while vote < numVotes:

            if len(giKnown) < retryMinValid:
                retrySubSample = False

            if retrySubSample:
                retrySubSample = False

                numSubSample = int(numSubSample * retrySubPortion) + 1
                retryIterKnown = random.sample(giKnown, numSubSample)
                if len(retryIterKnown) < retryMinValid:
                    retryIterKnown = random.sample(giKnown, retryMinValid)

                posTrain = features[retryIterKnown, :]
                posTrainLabel = np.ones((len(retryIterKnown), 1)) * pLabel

                nExamples = min(negMultiplier * len(retryIterKnown),
                                (len(giAll) - len(retryIterKnown)))
            else:
                giKnown = giKnown
                numSubSample = len(giKnown)

                posTrain = features[giKnown, :]
                posTrainLabel = np.ones((len(giKnown), 1)) * pLabel

                nExamples = min(negMultiplier * len(giKnown),
                                (len(giAll) - len(giKnown)))
            #end if

            # Extract the vectors for neg sets
            # as one-class: train with rand samp from Unknown
            #		test with all Unknown (TrueNeg + Hidden/TruePos)
            giTrainNeg = random.sample(giUnknown, nExamples)
            negTrain = features[giTrainNeg, :]
            negTrainLabel = np.ones((len(giTrainNeg), 1)) * nLabel

            # Combine to create the full train & test data sets
            # as one-class:
            trainSet = np.vstack((posTrain, negTrain))
            trainLabel = np.vstack((posTrainLabel, negTrainLabel))

            testSet = features[giAll, :]

            # Some versions want the labels reshaped
            trainLabel = np.reshape(trainLabel, [
                trainLabel.shape[0],
            ])

            # 7) Train classifier, predict on test, collect scores
            cfier = lm.LassoCV(alphas=useGivenRange,
                               positive=usePos,
                               max_iter=lMaxIter,
                               normalize=lNorm,
                               fit_intercept=lFitIcpt)
            cfier.fit(trainSet, trainLabel)
            foundAlpha = cfier.alpha_

            if printFlag:
                print(
                    "    Vote {} of {}; iters {:3d}, alpha {:.5f}, score {:.3f}; coeffs {}; sample {}"
                    .format((vote + 1), numVotes, cfier.n_iter_, foundAlpha,
                            cfier.score(trainSet, trainLabel),
                            len(np.nonzero(cfier.coef_)[0]),
                            len(posTrainLabel)))
            #end if

            cfPredLabel = cfier.predict(testSet)
            cfPredLabel = np.ravel(cfPredLabel)

            # If no coeffs (train score == 0) try again
            if len(np.nonzero(cfier.coef_)[0]) <= 0:
                if retries < (numVotes * 5):
                    retrySubSample = True
                    vote = vote - 1
                    retries += 1
                else:
                    if printFlag:
                        print("WARNING: used all retries.")

            # Else, collect info about the top-used features
            else:
                numSubSample = len(giKnown)

                # Extract indices corresponding to top 5 weighted features
                featWeights = cfier.coef_
                numFeats = len(np.nonzero(featWeights)[0])
                topFeats = np.ones((numFeats), dtype=np.int32) * (-1)
                for num in range(numFeats):
                    featIdx = np.argmax(featWeights)
                    topFeats[num] = featIdx
                    featWeights[featIdx] = 0
                #end loop

                # Increment count for the Top 1 path
                featT1List[topFeats[0]] += 1

                # Increment count for the Top 5 paths
                for num in range(5):
                    if numFeats <= num:
                        break
                    #end if
                    featT5List[topFeats[num]] += 1
                #end loop

                # Increment count for all non-zero paths
                for num in range(numFeats):
                    if numFeats <= num:
                        break
                    #end if
                    featTAList[topFeats[num]] += 1
                #end loop
            #end if

            voteScores[:, vote] = cfPredLabel

            vote += 1
        #end loop (vote)

        # 8) Place the scores into the array and store across iterations

        # first, average across the random negative samples (votes)
#TODO: really, I should either normalize the score or vote across rank
        voteScores = cl.normalizeFeatureColumns(voteScores)
        voteAvgScore = np.mean(voteScores, axis=1)
        voteUnknownScore = voteAvgScore[giUnknown]

        ranker = np.recarray(len(giUnknown),
                             dtype=[('inverse', 'f4'), ('score', 'f4'),
                                    ('geneIdx', 'i4')])

        ranker['score'] = voteUnknownScore
        ranker['inverse'] = np.multiply(voteUnknownScore, -1)
        ranker['geneIdx'] = giUnknown

        ranker.sort(order=['inverse', 'geneIdx'])

        # 11) Output the ranked genes to file

        # write the file
        fname = 'ranked_genes-' + useLabel + '_Avg.txt'
        if printFlag:
            print("  Saving ranked genes to file {}".format(fname))
        with open(si + fname, 'w') as fout:
            firstRow = True
            for row in range(len(ranker)):
                if not firstRow:
                    fout.write('\n')
                fout.write('{:3.3f}{}{}'.format(
                    ranker['score'][row], '\t',
                    geneNames[ranker['geneIdx'][row]]))
                firstRow = False
        #end with

        # 12) Output the selected feature info to file
#TODO: this

# Sort the Top 1 paths
        featT1Sort = np.recarray(numFeatAll,
                                 dtype=[('featIdx', 'i4'), ('count', 'i4')])
        for row in range(numFeatAll):
            featT1Sort['featIdx'][row] = row
            featT1Sort['count'][row] = featT1List[row]
        #end if
        featT1Sort[::-1].sort(order=['count', 'featIdx'])

        # Save the Top 1 paths to file
        fname = 'ranked_features_Top1-' + useLabel + '.txt'
        with open(si + fname, 'w') as fout:
            fout.write('Votes:{}{}'.format('\t', numVotes))
            row = 0
            nextVal = featT1Sort['count'][row]
            while nextVal != 0:
                fout.write('\n{}{}{}'.format(
                    nextVal, '\t', featNames[featT1Sort['featIdx'][row]]))
                row += 1
                nextVal = featT1Sort['count'][row]
        #end with

        # Sort the Top 5 paths
        featT5Sort = np.recarray(numFeatAll,
                                 dtype=[('featIdx', 'i4'), ('count', 'i4')])
        for row in range(numFeatAll):
            featT5Sort['featIdx'][row] = row
            featT5Sort['count'][row] = featT5List[row]
        #end if
        featT5Sort[::-1].sort(order=['count', 'featIdx'])

        # Save the Top 5 paths to file
        fname = 'ranked_features_Top5-' + useLabel + '.txt'
        with open(si + fname, 'w') as fout:
            fout.write('Votes:{}{}'.format('\t', numVotes))
            row = 0
            nextVal = featT5Sort['count'][row]
            while nextVal != 0:
                fout.write('\n{}{}{}'.format(
                    nextVal, '\t', featNames[featT5Sort['featIdx'][row]]))
                row += 1
                nextVal = featT5Sort['count'][row]
        #end with

        # Sort the Top All Non-Zero paths
        featTASort = np.recarray(numFeatAll,
                                 dtype=[('featIdx', 'i4'), ('count', 'i4')])
        for row in range(numFeatAll):
            featTASort['featIdx'][row] = row
            featTASort['count'][row] = featTAList[row]
        #end if
        featTASort[::-1].sort(order=['count', 'featIdx'])

        # Save the Top All Non-Zero paths to file
        fname = 'ranked_features_TopNZ-' + useLabel + '.txt'
        with open(si + fname, 'w') as fout:
            fout.write('Votes:{}{}'.format('\t', numVotes))
            row = 0
            nextVal = featTASort['count'][row]
            while nextVal != 0:
                fout.write('\n{}{}{}'.format(
                    nextVal, '\t', featNames[featTASort['featIdx'][row]]))
                row += 1
                nextVal = featTASort['count'][row]
        #end with

        # 13) Output the parameters to file
        fname = 'parameters-' + useLabel + '.txt'
        with open(si + fname, 'w') as fout:
            fout.write('\n')
            fout.write('Sampling Method for Neg examples\n')
            fout.write(
                '  as One-Class w/ iterations on the weaker predictions\n')
            fout.write('\n')
            fout.write('Features Used\n')
            fout.write('path Z-Score:{}{}\n'.format('\t', useFeatPathZScore))
            fout.write('Neighborhood:{}{}\n'.format('\t', useFeatNeighbor))
            fout.write('Term Weights:{}{}\n'.format('\t', useFeatTermWeights))
            fout.write('\n')

            #TODO: collect some stats (ie: common alphas, l1 ratios, etc)
            fout.write('Classifier Parameters\n')
            fout.write('method:{}Lasso\n'.format('\t'))
            fout.write('positive:{}{}\n'.format('\t', usePos))
            fout.write('alpha range:{}{}\n'.format('\t', useGivenRange))
            fout.write('alpha chosen:{}{}\n'.format('\t', cfier.alpha_))
            fout.write('max_iter:{}{}\n'.format('\t', lMaxIter))
            fout.write('normalize:{}{}\n'.format('\t', lNorm))
            fout.write('fit_intercept:{}{}\n'.format('\t', lFitIcpt))
            fout.write('\n')
        #end with

        if printFlag:
            print("--{} of {}".format(thisRound, len(dSubDirs)))
def calc_eccentricity(args, options):
    table = os.path.join(args[0], 'table2.dat')
    readme = os.path.join(args[0], 'ReadMe')
    dierickx = ascii.read(table, readme=readme)
    vxvv = np.dstack([
        dierickx['RAdeg'], dierickx['DEdeg'], dierickx['Dist'] / 1e3,
        dierickx['pmRA'], dierickx['pmDE'], dierickx['HRV']
    ])[0]
    ro, vo, zo = 8., 220., 0.025
    ra, dec = vxvv[:, 0], vxvv[:, 1]
    lb = bovy_coords.radec_to_lb(ra, dec, degree=True)
    pmra, pmdec = vxvv[:, 3], vxvv[:, 4]
    pmllpmbb = bovy_coords.pmrapmdec_to_pmllpmbb(pmra,
                                                 pmdec,
                                                 ra,
                                                 dec,
                                                 degree=True)
    d, vlos = vxvv[:, 2], vxvv[:, 5]
    rectgal = bovy_coords.sphergal_to_rectgal(lb[:, 0],
                                              lb[:, 1],
                                              d,
                                              vlos,
                                              pmllpmbb[:, 0],
                                              pmllpmbb[:, 1],
                                              degree=True)
    vsolar = np.array([-10.1, 4.0, 6.7])
    vsun = np.array([
        0.,
        1.,
        0.,
    ]) + vsolar / vo
    X = rectgal[:, 0] / ro
    Y = rectgal[:, 1] / ro
    Z = rectgal[:, 2] / ro
    vx = rectgal[:, 3] / vo
    vy = rectgal[:, 4] / vo
    vz = rectgal[:, 5] / vo
    vsun = np.array([
        0.,
        1.,
        0.,
    ]) + vsolar / vo
    Rphiz = bovy_coords.XYZ_to_galcencyl(X, Y, Z, Zsun=zo / ro)
    vRvTvz = bovy_coords.vxvyvz_to_galcencyl(vx,
                                             vy,
                                             vz,
                                             Rphiz[:, 0],
                                             Rphiz[:, 1],
                                             Rphiz[:, 2],
                                             vsun=vsun,
                                             Xsun=1.,
                                             Zsun=zo / ro,
                                             galcen=True)
    #do the integration and individual analytic estimate for each object
    ts = np.linspace(0., 20., 10000)
    lp = LogarithmicHaloPotential(normalize=1.)
    e_ana = numpy.zeros(len(vxvv))
    e_int = numpy.zeros(len(vxvv))
    print(
        'Performing orbit integration and analytic parameter estimates for Dierickx et al. sample...'
    )
    for i in tqdm(range(len(vxvv))):
        try:
            orbit = Orbit(vxvv[i], radec=True, vo=220., ro=8.)
            e_ana[i] = orbit.e(analytic=True, pot=lp, c=True)
        except UnboundError:
            e_ana[i] = np.nan
        orbit.integrate(ts, lp)
        e_int[i] = orbit.e(analytic=False)
    fig = plt.figure()
    fig.set_size_inches(1.5 * columnwidth, 1.5 * columnwidth)
    plt.scatter(e_int, e_ana, s=1, color='Black', lw=0.)
    plt.xlabel(r'$\mathrm{galpy\ integrated}\ e$')
    plt.ylabel(r'$\mathrm{galpy\ analytic}\ e$')
    plt.xlim(0., 1.)
    plt.ylim(0., 1.)
    fig.tight_layout()
    plt.savefig(os.path.join(args[0], 'dierickx-integratedeanalytice.png'),
                format='png',
                dpi=200)
    fig = plt.figure()
    fig.set_size_inches(1.5 * columnwidth, 1.5 * columnwidth)
    plt.hist(e_int, bins=30)
    plt.xlim(0., 1.)
    plt.xlabel(r'$\mathrm{galpy}\ e$')
    fig.tight_layout()
    plt.savefig(os.path.join(args[0], 'dierickx-integratedehist.png'),
                format='png',
                dpi=200)
    fig = plt.figure()
    fig.set_size_inches(1.5 * columnwidth, 1.5 * columnwidth)
    plt.scatter(dierickx['e'], e_int, s=1, color='Black', lw=0.)
    plt.xlabel(r'$\mathrm{Dierickx\ et\ al.}\ e$')
    plt.ylabel(r'$\mathrm{galpy\ integrated}\ e$')
    plt.xlim(0., 1.)
    plt.ylim(0., 1.)
    fig.tight_layout()
    plt.savefig(os.path.join(args[0], 'dierickx-integratedee.png'),
                format='png',
                dpi=200)
    fig = plt.figure()
    fig.set_size_inches(1.5 * columnwidth, 1.5 * columnwidth)
    plt.scatter(dierickx['e'], e_ana, s=1, color='Black', lw=0.)
    plt.xlabel(r'$\mathrm{Dierickx\ et\ al.}\ e$')
    plt.ylabel(r'$\mathrm{galpy\ estimated}\ e$')
    plt.xlim(0., 1.)
    plt.ylim(0., 1.)
    fig.tight_layout()
    plt.savefig(os.path.join(args[0], 'dierickx-analyticee.png'),
                format='png',
                dpi=200)
    arr = numpy.recarray(len(e_ana),
                         dtype=[('analytic_e', float),
                                ('integrated_e', float)])
    arr['analytic_e'] = e_ana
    arr['integrated_e'] = e_int
    with open(os.path.join(args[0], 'eccentricities.dat'), 'w') as file:
        pickle.dump(arr, file)
        file.close()
Example #46
0
def _Combine10sDateSpecies(Date, Species='H', Verbose=True, Overwrite=False):
    '''
	Combines the relevant files for a given species on a given date.
	
	Inputs
	=======
	Date : integer, format: yyyymmdd
	Species: string 'H','He','He2','O','Na'
	
	'''

    #use species to calculate some constants
    mass = Globals.Constants.amu * Globals.IonMass.get(Species,
                                                       Globals.IonMass['H'])
    e = Globals.Constants.e
    g = Globals.Constants.g
    kB = Globals.Constants.kB
    dOmega = Globals.Constants.dOmega
    eqbins0 = Globals.EQBins[0]
    eqbins2 = Globals.EQBins[2]
    if Species == 'He2':
        vbins0 = np.sqrt((2 * e * 2000.0 * eqbins0) / mass)
        vbins2 = np.sqrt((2 * e * 2000.0 * eqbins2) / mass)
    else:
        vbins0 = np.sqrt((e * 2000.0 * eqbins0) / mass)
        vbins2 = np.sqrt((e * 2000.0 * eqbins2) / mass)

    #get output dtype, file name and path
    OutPath = Globals.MessPath + 'FIPS/Combined/10s/{:s}/'.format(Species)
    if not os.path.isdir(OutPath):
        os.system('mkdir -pv ' + OutPath)
    dtype = Globals.dtype10s
    fname = OutPath + '{:08d}.bin'.format(Date)

    if os.path.isfile(fname) and not Overwrite:
        print("File {:s} exists".format(fname))
        return

    #read in the four data files (if they exist)
    dS = ReadData(Date, 'espec')
    dN = ReadData(Date, 'ntp')
    dE = ReadData(Date, 'edr')
    dC = ReadData(Date, 'cdr')

    #check that there are any data points:
    if dE.size == 0 and dC.size == 0 and dS.size == 0 and dN.size == 0:
        return  #no data found at all for this date

    #find number of record using either EDR/CDR (for H) or ESPEC (for everything else)
    if Species == 'H':
        n = dC.size
        MET = dC.MET
        Index = np.arange(dC.size)
    else:
        n = dS.size
        MET = dS.MET
        Index = dS.Index

    #now time to create the output array
    out = np.recarray(n, dtype=dtype)

    #save some ion info
    spstr = Species + (3 - (len(Species))) * ' '
    out.Ion = spstr
    out.Mass = mass

    #save ut and MET
    met0 = dC.MET[0] - dC.ut[0] * 3600.0  #MET at the start of the day
    out.Date = Date
    out.Index = Index
    out.MET = MET
    out.ut = (out.MET - met0) / 3600.0

    #continuous ut
    out.utc = ContUT(out.Date, out.ut)

    #position
    pos = GetPosition(Date)
    if pos.size > 0:
        fx = interp1d(pos.ut,
                      pos.x,
                      kind='cubic',
                      bounds_error=False,
                      fill_value='extrapolate')
        fy = interp1d(pos.ut,
                      pos.y,
                      kind='cubic',
                      bounds_error=False,
                      fill_value='extrapolate')
        fz = interp1d(pos.ut,
                      pos.z,
                      kind='cubic',
                      bounds_error=False,
                      fill_value='extrapolate')
        out.x = fx(out.ut)
        out.y = fy(out.ut)
        out.z = fz(out.ut)
    else:
        pos.x = np.nan
        pos.y = np.nan
        pos.z = np.nan

    #location
    out.Loc = GetRegion(out.Date, out.ut, out.utc, Verbose=False)

    #set default CDR quality flag
    #Normally 0 = good, 1 = bad, here -1 = not present
    out.CDRQuality[:] = -1
    out.NTPQuality[:] = -1

    #get the appropriate flux
    Flux = dS[Species + 'Flux']

    #loop through groups
    for i in range(0, n):
        if Verbose:
            print('\rCopying data {:f}%'.format(100.0 * (i + 1) / n), end='')
        #get the METS from ESPEC first, the rest have to match this!
        if Species == 'H':
            useS = np.where(dS.Index == Index[i])[0]
            useE = np.array([i])
            useC = np.array([i])
        else:
            useS = np.array([i])
            useC = np.where(dC.Index == Index[i])[0]
            useE = useC

        #useC = np.where((dC.MET >= StartMET[i]) & (dC.MET <= StopMET[i]))[0]
        useN = np.where((dN.StartIndex <= Index[i])
                        & (dN.StopIndex >= Index[i]))[0]

        #set E/Q and V bins
        if useE.size == 0:
            out[i].ScanType = -1
            out[i].EQBins = eqbins0
            out[i].Tau = 0.095
        else:
            out[i].ScanType = stats.mode(dE[useE].ScanType)[0][0]
            if out[i].ScanType == 0:
                out[i].EQBins = eqbins0
                out[i].VBins = vbins0 / 1000.0
                out[i].Tau = 0.095
            else:
                out[i].EQBins = eqbins2
                out[i].VBins = vbins2 / 1000.0
                out[i].Tau = 0.005

        #copy counts across,summing over spectra (proton counts only here)
        if useE.size > 0 and Species == 'H':
            out[i].Counts = np.sum(dE.ProtonRate[useE], 0)
        else:
            out[i].Counts[:] = 0

        #now to move the fluxes over from ESPEC
        if useS.size > 0:
            out[i].Flux = Flux[useS[0]]

            #calculate PSD
            out[i].PSD = out[i].Flux * (mass / (out[i].VBins**2)) * (10.0 / e)

        #save the quality flags
        if useC.size > 0:
            out[i].CDRQuality = dC[useC[0]].Quality

        #input NTP values if they exist
        out.HasNTP[i] = False
        out.n[i] = np.nan
        out.t[i] = np.nan
        out.p[i] = np.nan
        if useN.size > 0 and Species == 'H':
            #currently this only exists for H
            out.n[i] = dN[useN[0]].n
            out.t[i] = dN[useN[0]].t
            out.p[i] = dN[useN[0]].p
            out.HasNTP[i] = True
            out.NTPQuality[i] = dN[useN[0]].Quality
    if Verbose:
        print()

    #This following bit will only work for protons currently, for all other ions Eff = 1
    if Species == 'H':
        #calculate efficiencies
        Tau2 = np.array([5] * 52 + [0] * 12) / 1000.0
        Tau0 = np.array([95] * 60 + [0] * 4) / 1000.0
        Eff = np.zeros((n, 64), dtype='float32')
        for i in range(0, n):
            if Verbose:
                print('\rCalculating Efficiencies {:f}%'.format(100.0 *
                                                                (i + 1) / n),
                      end='')
            if out[i].ScanType == 0:
                Ebins = eqbins0
                Tau = Tau0
            else:
                Ebins = eqbins2
                Tau = Tau2
            zero = np.where(out[i].Counts == 0)[0]

            Eff[i] = _CalculateProtonEff(Ebins, Tau, out[i].Flux,
                                         out[i].Counts)
            nf = np.where(np.isfinite(Eff[i]) == False)[0]
            Eff[i][nf] = np.nan
            Eff[i][zero] = np.nan

        if np.size(Eff.shape) == 2:
            Eff = np.nanmean(Eff, 0)
            Eff[np.isfinite(Eff) == False] = np.nan
    else:
        Tau2 = np.array([5] * 52 + [0] * 12) / 1000.0
        Tau0 = np.array([95] * 60 + [0] * 4) / 1000.0
        Eff = np.zeros((n, 64), dtype='float32')
        for i in range(0, n):
            if Verbose:
                print('\rCalculating Efficiencies {:f}%'.format(100.0 *
                                                                (i + 1) / n),
                      end='')
            if out[i].ScanType == 0:
                Ebins = eqbins0
                Tau = Tau0
            else:
                Ebins = eqbins2
                Tau = Tau2
            zero = np.where(out[i].Counts == 0)[0]
            nf = np.where(np.isfinite(Eff) == False)[0]
            Eff[i] = Tau * 1.0
            Eff[i][zero] = np.nan
            Eff[i][nf] = np.nan
        if np.size(Eff.shape) == 2:
            Eff = np.nanmean(Eff, 0)
            Eff[np.isfinite(Eff) == False] = np.nan
    if Verbose:
        print()

    if Species == 'H':
        #attempt to refit the spectrum with a kappa distribution
        for i in range(0, n):
            if Verbose:
                print('\rRefitting Spectra {:f}%'.format(100.0 * (i + 1) / n),
                      end='')
            #save efficiency
            out[i].Efficiency[:] = Eff

            #set starting guess for n and T based on original fits if they exist
            if np.isnan(out[i].n):
                n0 = 2.0e6
                T0 = 10.0e6
            else:
                n0 = out[i].n * 1e6
                T0 = out[i].t * 1e6

            #now try fitting
            nTK = FitKappaDistCts(out.VBins[i] * 1000.0, out.Counts[i], n0, T0,
                                  dOmega, mass, Eff, 1, out[i].Tau, g)
            #check that the values are all positive at least
            if nTK[0] > 0 and nTK[1] > 0 and nTK[2] > 0:
                out[i].nk = nTK[0] / 1e6
                out[i].tk = nTK[1] / 1e6
                out[i].k = nTK[2]
                out[i].pk = nTK[0] * kB * nTK[1] * 1e9
            else:
                out[i].nk = np.nan
                out[i].tk = np.nan
                out[i].k = np.nan
                out[i].pk = np.nan

        if Verbose:
            print()
    if out.size > 0:
        RT.SaveRecarray(out, fname)
    return out
Example #47
0
 def test_record_scalar_setitem(self):
     # https://github.com/numpy/numpy/issues/3561
     rec = np.recarray(1, dtype=[('x', float, 5)])
     rec[0].x = 1
     assert_equal(rec[0].x, np.ones(5))
Example #48
0
File: RNTN.py Project: yuan776/RNTN
def get_tree(proto_tree, N, vocabulary, L, params):
    '''
    '''

    dtype = [('node_idx', int),
             ('pair', int),
             ('side', '|S1'),
             ('parent', int),
             ('child_left', int),
             ('child_right', int),
             ('depth', int),
             ('leaf', bool),
             ('t', np.object),
             ('t_gpu', np.object),
             ('y_gpu', np.object),
             ('x_gpu', np.object),
             ('d_gpu', np.object),
             ('ds_gpu', np.object),
             ('d2s_gpu', np.object),
             ('word_idx', int),
             ('word', 'U27')]

    tree = np.recarray(N, dtype=dtype)
    tree[:] = -1
    tree['pair'] = -1
    
    children_stack = []
    node_idx = 0
    side = ''
    depth = 1
    
    #-------------- Add data to tree -----------------#
    def add_data(wrd, leaf, child_l=None, child_r=None):
    
        tree[node_idx]['node_idx'] = node_idx
        tree[node_idx]['side'] = side
        tree[node_idx]['t'] = np.zeros(params['C'], dtype=np.float64)
        tree[node_idx]['t'][wrd[0]] = 1.0
        tree[node_idx]['depth'] = depth
        tree[node_idx]['leaf'] = leaf
        if leaf:
            tree[node_idx]['word'] = wrd[1]
            tree[node_idx]['word_idx'] = vocabulary.index(wrd[1])
            x_gpu = gpuarray.to_gpu(L[tree[node_idx]['word_idx'], :])
            tree[node_idx]['x_gpu'] = x_gpu
        else:
            tree[node_idx]['word'] = ''
            tree[node_idx].child_right = child_r
            tree[node_idx].child_left = child_l
            # Add parent
            tree[child_r]['parent'] = tree[node_idx]['node_idx']
            tree[child_l]['parent'] = tree[node_idx]['node_idx']            
            tree[node_idx]['x_gpu'] = gpuarray.empty(params['w_d'], np.float64)
        
        tree[node_idx]['d_gpu'] = gpuarray.empty(params['w_d'], np.float64)
        tree[node_idx]['ds_gpu'] = gpuarray.empty(params['w_d'], np.float64)
        tree[node_idx]['d2s_gpu'] = gpuarray.empty(params['C'], np.float64)
        tree[node_idx]['y_gpu'] = gpuarray.empty(params['C'], np.float64)
        tree[node_idx]['t_gpu'] = gpuarray.to_gpu(tree[node_idx]['t'])
        # All pairs at current depth
        pairs = tree['pair'][tree['depth'] == depth]
        # Max is last pair
        if pairs.max() == -1:  # No pairs at this depth
            tree[node_idx]['pair'] = 1  # Start new pair
        elif (pairs > -1).sum() % 2: # Odd number -> complete the last pair               
            tree[node_idx]['pair'] = pairs.max()
        else:  # Left half of new pair
            tree[node_idx]['pair'] = pairs.max() + 1 
    #-----------------------------------------------#

    pos = 'proto_tree'
    while len(proto_tree) > 1:
        if len(eval(pos)) == 3: # Both branches
            pos += '[1]' # go down in left
            depth += 1
            side = 'l' # left side

        elif len(eval(pos)) == 2: # Only right branch or leaf

            if type(eval(pos + '[1]')) is str:  # leaf
                # Cut
                pos = pos[:-3]
                wrd = eval(pos).pop(1)
                add_data(wrd, True)
                # Put node_idx in the children_stack, but don't pop since its
                # a leaf node, ie no children
                children_stack.append(node_idx)
                node_idx += 1
                # Climb up
                depth -= 1

            else:  # right branch
                pos += '[1]' # go down in right
                depth += 1
                side = 'r' # right side

        elif len(eval(pos)) == 1: # Branch node, w both children cut
            # Cut
            pos = pos[:-3]
            wrd = eval(pos).pop(1)
            # left OR right side in a pair?
            if len(eval(pos)) == 2:         # if parent len is 2 -> left
                side = 'l' # left side
            elif len(eval(pos)) == 1:       # if parent len is 1 -> right
                side = 'r' # right side

            # Pop children
            child_r = children_stack.pop()
            child_l = children_stack.pop()
            add_data(wrd, False, child_l=child_l, child_r=child_r)
            # Put node_idx in the children_stack
            children_stack.append(node_idx)
            node_idx += 1
            # Climb up
            depth -= 1

    side = ''
    child_r = children_stack.pop()
    child_l = children_stack.pop()
    add_data(proto_tree, False, child_l=child_l, child_r=child_r)
    tree[node_idx]['parent'] = -1  # fix -1/last element mix up.
    
        
    return tree
Example #49
0
    if chandra_type[i] == 'GALAXY':
        rad = float(0.9 * 16.6 + (1.08 * z))
        chandra_r.append(rad)
        chandra_t.append('!GALAXY')
    else:
        rad = float(0.5 * 16.6 + (1.08 * z))
        chandra_r.append(rad)
        chandra_t.append('!AGN')

for i in range(0, num):
    radius = chandra_r[i] / 0.05
    r[i] = radius

data2 = np.recarray((num, ),
                    dtype=(numpy.record, [('TYPE', 'S16'), ('X', '>f4', (4, )),
                                          ('Y', '>f4', (4, )),
                                          ('R', '>f4', (4, )),
                                          ('FLUX', '>f4', (1, )),
                                          ('ROTANG', '>f4', (4, ))]))
for i in range(0, num):
    data2[i][0] = chandra_t[i]
    data2[i][1] = (chandra_ra[i], 0, 0, 0)
    data2[i][2] = (chandra_dec[i], 0, 0, 0)
    data2[i][3] = (r[i], 0, 0, 0)
    data2[i][4] = chandra_flux[i]
    data2[i][5] = (0., 0, 0, 0)

file = "pnS005-bkg_region-radec.fits"
hdu = pyfits.open(file)
data = hdu[1].data

nhdu1 = pyfits.PrimaryHDU()
Example #50
0
class DataChopper(PropertiedObject, BaseFilter):
    """
    EventDataChopper converts continuous time series of entire session into chunks based on the events specification
    In other words you may read entire eeg session first and then using EventDataChopper
    divide it into chunks corresponding to events of your choice
    """
    _descriptors = [
        TypeValTuple('start_time', float, 0.0),
        TypeValTuple('end_time', float, 0.0),
        TypeValTuple('buffer_time', float, 0.0),
        TypeValTuple('events', np.recarray,
                     np.recarray((1, ), dtype=[('x', int)])),
        TypeValTuple('start_offsets', np.ndarray, np.array([], dtype=int)),
        TypeValTuple('session_data', TimeSeriesX,
                     TimeSeriesX([0.0], dict(samplerate=1.), dims=['time'])),
    ]

    def __init__(self, **kwds):
        """
        Constructor:

        :param kwds:allowed values are:
        -------------------------------------
        :param start_time {float} -  read start offset in seconds w.r.t to the eegeffset specified in the events recarray
        :param end_time {float} -  read end offset in seconds w.r.t to the eegeffset specified in the events recarray
        :param end_time {float} -  extra buffer in seconds (subtracted from start read and added to end read)
        :param events {np.recarray} - numpy recarray representing events
        :param startoffsets {np.ndarray} - numpy array with offsets at which chopping should take place
        :param session_datar {str} -  TimeSeriesX object with eeg session data

        :return: None
        """

        self.init_attrs(kwds)

    def get_event_chunk_size_and_start_point_shift(self, eegoffset, samplerate,
                                                   offset_time_array):
        """
        Computes number of time points for each event and read offset w.r.t. event's eegoffset
        :param ev: record representing single event
        :param samplerate: samplerate fo the time series
        :param offset_time_array: "offsets" axis of the DataArray returned by EEGReader. This is the axis that represents
        time axis but instead of beind dimensioned to seconds it simply represents position of a given data point in a series
        The time axis is constructed by dividint offsets axis by the samplerate
        :return: event's read chunk size {int}, read offset w.r.t. to event's eegoffset {}
        """
        # figuring out read size chunk and shift w.r.t to eegoffset. We need this fcn in case we pass resampled session data

        original_samplerate = float(
            (offset_time_array[-1] -
             offset_time_array[0])) / offset_time_array.shape[0] * samplerate

        start_point = eegoffset - int(
            np.ceil(
                (self.buffer_time - self.start_time) * original_samplerate))
        end_point = eegoffset + int(
            np.ceil((self.end_time + self.buffer_time) * original_samplerate))

        selector_array = np.where((offset_time_array >= start_point)
                                  & (offset_time_array < end_point))[0]
        start_point_shift = selector_array[0] - np.where(
            (offset_time_array >= eegoffset))[0][0]

        return len(selector_array), start_point_shift

    def filter(self):
        """
        Chops session into chunks corresponding to events
        :return: timeSeriesX object with chopped session
        """
        chop_on_start_offsets_flag = bool(len(self.start_offsets))

        if chop_on_start_offsets_flag:

            start_offsets = self.start_offsets
            chopping_axis_name = 'start_offsets'
            chopping_axis_data = start_offsets
        else:

            evs = self.events[self.events.eegfile ==
                              self.session_data.attrs['dataroot']]
            start_offsets = evs.eegoffset
            chopping_axis_name = 'events'
            chopping_axis_data = evs

        # samplerate = self.session_data.attrs['samplerate']
        samplerate = float(self.session_data['samplerate'])
        offset_time_array = self.session_data['offsets']

        event_chunk_size, start_point_shift = self.get_event_chunk_size_and_start_point_shift(
            eegoffset=start_offsets[0],
            samplerate=samplerate,
            offset_time_array=offset_time_array)

        event_time_axis = np.arange(event_chunk_size) * (1.0 / samplerate) + (
            self.start_time - self.buffer_time)

        data_list = []

        for i, eegoffset in enumerate(start_offsets):

            start_chop_pos = np.where(offset_time_array >= eegoffset)[0][0]
            start_chop_pos += start_point_shift
            selector_array = np.arange(start=start_chop_pos,
                                       stop=start_chop_pos + event_chunk_size)

            chopped_data_array = self.session_data.isel(time=selector_array)

            chopped_data_array['time'] = event_time_axis
            chopped_data_array['start_offsets'] = [i]

            data_list.append(chopped_data_array)

        ev_concat_data = xr.concat(data_list, dim='start_offsets')

        ev_concat_data = ev_concat_data.rename(
            {'start_offsets': chopping_axis_name})
        ev_concat_data[chopping_axis_name] = chopping_axis_data

        attrs = {
            "start_time": self.start_time,
            "end_time": self.end_time,
            "buffer_time": self.buffer_time
        }
        ev_concat_data['samplerate'] = samplerate
        return TimeSeriesX.create(ev_concat_data, samplerate, attrs=attrs)
Example #51
0
def extract_data(pattern, query, headers, ctypes=None, fname=''):
    """
    Extract data from CSV files whose name matches pattern.
    Every record in a given file is checked if it satisfies the query
    condition(s). If the query condition(s) are satisfied, data from the
    columns specified by headers are extracted from that record. Collected
    records are returned in a numpy record array and, if a filename fname is
    specified, they are also written to fn in tab-separated CSV format.
    If no matching records are found an empty record array of type bool is
    returned.
    argument:       comment:
    pattern         a file name pattern for files from which records are to be
                    extracted
    query           conditions in the form of a dictionary (list of key-value
                    pairs) that need to be fulfilled for a record to be
                    extracted;
                    the query dictionary is specified in one of these forms:
                      dict(k1=v1, k2=v2, k3=v3, ...)
                      dict({"k1":v1, "k2":v2, "k3":v3, ...})
                      {"k1":v1, "k2":v2, "k3":v3, ...}
    headers         a list of strings specifying the column headers for the
                    columns which are to be extracted
    ctypes          if not None, is a dictionary mapping column number or
                    munged column name to a converter function;
                    the column type converter dictionary can be specified as:
                      {"k1":t1, "k2":t2, "k3":t3, ...}
                    where the t can be, e.g., str, int, float, bool.
    fname           if defined, the name of the CSV file (tab-separated) to
                    which extracted records are written
    """

    # get all file names that match pattern
    infiles = glob.glob(pattern)
    infiles.sort()

    # determine the query and header keys (in lowercase because csv2rec
    # lowercases headers), and query values
    qkeys = query.keys()
    qlckeys = [x.lower() for x in qkeys]
    qvalues = query.values()
    hlckeys = [x.lower() for x in headers]
    if ctypes:
        ctypes_lc = dict(
            (key.lower(), value) for (key, value) in ctypes.items())
    else:
        ctypes_lc = None
    mkeys = set(qlckeys)
    mkeys = mkeys.union(hlckeys)
    mrows = []

    # check files for query patterns
    for f in infiles:
        d = mlab.csv2rec(f, delimiter='\t', converterd=ctypes_lc)
        # check if the data contain the necessary columns
        if mkeys <= set(d.dtype.names):
            # find the records that match the query
            darray = mlab.rec_keep_fields(d, qlckeys)
            imatch = np.array([False] * darray.size)
            for i in range(darray.size):
                if list(darray[i]) == qvalues:
                    imatch[i] = True
            # get data from records that matched the query
            if any(imatch):
                marray = mlab.rec_keep_fields(d, hlckeys)[imatch]
                for row in marray:
                    mrows.append(row.tolist())

    # write data from matching records to file if requested and return results
    if mrows:
        # The following does not work because the mlab.csv2rec() converterd
        # data type specifications are different from the
        # np.core.records.fromrecords() dtype data type specifications ...
        #results = np.core.records.fromrecords(mrows, dtype=ctypes_lc)
        # ... so, for now we cross our fingers and hope that
        # np.core.records.fromrecords() intuits the data types correctly, which
        # it seems to do (most of the time)
        results = np.core.records.fromrecords(mrows, names=headers)
    else:
        dt = [(h, bool) for h in headers]
        results = np.recarray(0, dtype=dt)
    if fname != '':
        mlab.rec2csv(results, fname, delimiter='\t')
    return results
Example #52
0
    def test_matching(self):
        '''
        tests keypoints matching kernel
        '''
        image = scipy.misc.lena().astype(numpy.float32)
        try:
            import feature
        except:
            logger.error(
                "WARNING: feature module is not available to compare results with C++ implementation. Matching cannot be tested."
            )
            feature = None

        if (feature != None):
            #get the struct keypoints : (x,y,s,angle,[descriptors])
            sc = feature.SiftAlignment()
            ref_sift = sc.sift(image)
            ref_sift_2 = numpy.recarray((ref_sift.shape), dtype=ref_sift.dtype)
            ref_sift_2[:] = (ref_sift[::-1])
            t0_matching = time.time()
            siftmatch = feature.sift_match(ref_sift, ref_sift_2)
            t1_matching = time.time()
            ref = ref_sift.desc

            if (USE_CPU): wg = 1,
            else: wg = 64,
            shape = ref_sift.shape[0] * wg[0],

            ratio_th = numpy.float32(0.5329)  #sift.cpp : 0.73*0.73
            keypoints_start, keypoints_end = 0, min(ref_sift.shape[0],
                                                    ref_sift_2.shape[0])

            gpu_keypoints1 = pyopencl.array.to_device(queue, ref_sift)
            gpu_keypoints2 = pyopencl.array.to_device(queue, ref_sift_2)
            gpu_matchings = pyopencl.array.zeros(
                queue, (keypoints_end - keypoints_start, 2),
                dtype=numpy.int32,
                order="C")
            keypoints_start, keypoints_end = numpy.int32(
                keypoints_start), numpy.int32(keypoints_end)
            nb_keypoints = numpy.int32(10000)
            counter = pyopencl.array.zeros(queue, (1, 1),
                                           dtype=numpy.int32,
                                           order="C")

            t0 = time.time()
            k1 = self.program.matching(queue, shape, wg, gpu_keypoints1.data,
                                       gpu_keypoints2.data, gpu_matchings.data,
                                       counter.data, nb_keypoints, ratio_th,
                                       keypoints_end, keypoints_end)
            res = gpu_matchings.get()
            cnt = counter.get()
            t1 = time.time()

            #        ref_python, nb_match = my_matching(kp1, kp2, keypoints_start, keypoints_end)
            t2 = time.time()

            res_sort = res[numpy.argsort(res[:, 1])]
            #        ref_sort = ref[numpy.argsort(ref[:,1])]

            print res[0:20]
            print ""
            #        print ref_sort[0:20]
            print("C++ Matching took %.3f ms" % (1000.0 *
                                                 (t1_matching - t0_matching)))
            print("OpenCL: %d match / C++ : %d match" %
                  (cnt, siftmatch.shape[0]))

            #sort to compare added keypoints
            '''
            delta = abs(res_sort-ref_sort).max()
            self.assert_(delta == 0, "delta=%s" % (delta)) #integers
            logger.info("delta=%s" % delta)
            '''

            if PROFILE:
                logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                            (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
                logger.info("Matching took %.3fms" %
                            (1e-6 * (k1.profile.end - k1.profile.start)))
Example #53
0
    def load_data(self):
        """ Load the data from delirium file and make some check on it 

        The check are set thanks to the data_check dictionary attribute:
            'complete' : check if all data is there
            'sample' : check if the data is well sampled
            'nan' :  check for Nan Values 
            'glitches' :  check glitches 

        """
        ## load the raw data
        self.load_header()
        if self.f:
            self.log.notice("Loadding raw data of '%s'" % self.filename, 3)
            print("self.f", self.f)
            data = np.loadtxt(self.f,
                              comments="%",
                              dtype=self.params.get_dtypes())
        else:
            N = int((self.opl_max - self.opl_min) / self.opl_sampling + 1)
            data = np.zeros((N, ), dtype=self.params.get_dtypes())
            data['opl'] = np.linspace(self.opl_min, self.opl_max, N)
            self.data = data
            self.log.notice("Fake Delirium created with empy value", 3)
            return

        if not len(data):
            self.log.error("data is empty")
            raise DataError("data is empty")
        ## set the reverse flag for future isterezis reference
        self.reverse = data["opl"][0] > data["opl"][-1]
        ## now sort the table by opl
        data.sort(order="opl")

        opl = data["opl"]
        opl_sampling = self.opl_sampling

        ## complete the data to nominal range
        ## so that the table as always the good size
        if self.data_check["complete"]:
            self.log.notice("Check data completness", 3)
            first = self.opl2index(opl[0])

            addrowfirst = 0
            addrowend = 0
            if first > 0:
                self.log.warning(
                    "Missing data at begining of scan, data start at OPL =%.2f"
                    % opl[0])
                addrowfirst += first
            last = self.opl2index(opl[-1])
            ## the maximum index from the opl_max distance
            imax = self.opl2index(self.opl_max)
            if last < imax:
                self.log.warning(
                    "Missing data at end  of scan, data end at OPL =%.2f" %
                    opl[-1])
                addrowend += int(imax - last)

            if addrowfirst + addrowend:
                newdata = np.recarray(
                    (data.shape[0] + addrowfirst + addrowend, ),
                    dtype=data.dtype)

                if addrowend:
                    newdata[addrowfirst:-addrowend] = data
                else:
                    newdata[addrowfirst:] = data
                newdata[0:addrowfirst] = data[0]
                newdata["opl"][0:addrowfirst] = self.index2opl(
                    np.arange(first))

                if addrowend:
                    newdata[-addrowend:] = data[-1]
                    newdata["opl"][-addrowend:] = self.index2opl(
                        np.arange(last, imax))

                data = newdata

        #  Check that data are sampled by 0.375m ?3cm OPL intervalle.
        if self.data_check["sample"]:
            self.log.notice("Check data sampling", 3)

            diff = data["opl"][1:] - data["opl"][:-1]

            if (abs(data["opl"][0] - self.opl_min) > self.opl_tol):
                self.log.error('Incorrect range: min = %f m' %
                               (data["opl"][0]))
                raise DataError('Incorrect range: min = %f m' %
                                (data["opl"][0]))
            if (abs(data["opl"][-1] - self.opl_max) > self.opl_tol):
                self.log.error('Incorrect range: max = %f m' %
                               (data["opl"][-1]))
                raise DataError('Incorrect range: max = %f m' %
                                (data["opl"][-1]))
            if ((abs(diff) - self.opl_sampling) > self.opl_tol).any():
                self.log.error(
                    'Incorrect sampling, some does not follow %.3f+-%.3f ' %
                    (self.opl_sampling, self.opl_tol))
                raise DataError(
                    'Incorrect sampling, some does not follow %.3f+-%.3f ' %
                    (self.opl_sampling, self.opl_tol))

        mask = np.zeros(data.shape, dtype=bool)
        if self.data_check["nan"]:
            opl = data['opl']
            self.log.notice("Check NaN values", 3)
            for key in ["doms", "incl", "yctr", "zctr", "yend", "zend"]:
                v = data[key]
                Nv = len(v)
                test = np.isnan(v)
                ou = np.where(test)[0]
                Nou = len(ou)
                if not Nou: continue

                self.log.notice(
                    "Found %d NaN values for parameter %s" % (Nou, key), 1)

                for iou, i in enumerate(ou):
                    if (iou + 1) < Nou and ou[iou + 1] == i + 1:
                        self.log.error(
                            "At least two consecutives NaN values. Cannot fixe delirium data"
                        )
                        raise DataError(
                            "At least two consecutives NaN values. Cannot fixe delirium data"
                        )
                    if i == 0:
                        v[i] = v[i + 1]
                    elif i == (Nv - 1):
                        v[i] = v[i - 1]
                    else:
                        v[i] = np.interp(opl[i], opl[[i - 1, i + 1]],
                                         v[[i - 1, i + 1]])

                #mask += np.isnan(data[key])

        if self.data_check["glitches"]:
            self.log.notice("Checking glitches", 3)

            for key in ["incl", "yctr", "zctr", "yend", "zend"]:
                mask += self.filter_gliches(data[key])

        # :TODO: check what to do with invalid data
        # in mathlab Replace invalid FOGALE data by extrapolation using second order fit of
        self.data = data
        ## close the file
        if self.f:
            self.f.close()
    def writeObs(self,
                 objId,
                 interpfuncs,
                 simdata,
                 idxObs,
                 outfileName='out.txt',
                 sedname='C.dat',
                 seeingCol='FWHMgeom',
                 expMJDCol='expMJD',
                 expTimeCol='visitExpTime'):
        """
        Call for each object; write out the observations of each object.
        """
        # Return if there's nothing to write out.
        if len(idxObs) == 0:
            return
        # Open file if needed.
        try:
            self.outfile
        except AttributeError:
            self._openOutput(outfileName)
        # Calculate the ephemerides for the object, using the interpfuncs, for the times in simdata[idxObs].
        tvis = simdata[expMJDCol][idxObs]
        ephs = np.recarray([len(tvis)],
                           dtype=([('delta', '<f8'), ('ra', '<f8'),
                                   ('dec', '<f8'), ('magV', '<f8'),
                                   ('time', '<f8'), ('dradt', '<f8'),
                                   ('ddecdt', '<f8'), ('phase', '<f8'),
                                   ('solarelon', '<f8'), ('velocity', '<f8')]))
        for n in interpfuncs:
            ephs[n] = interpfuncs[n](tvis)
        ephs['time'] = tvis
        # Calculate the extra columns we want to write out
        # (dmag due to color, trailing loss, and detection loss)
        # First calculate and match the color dmag term.
        dmagColor = np.zeros(len(idxObs), float)
        dmagColorDict = self._calcColors(sedname)
        filterlist = np.unique(simdata[idxObs]['filter'])
        for f in filterlist:
            if f not in dmagColorDict:
                raise UserWarning(
                    'Could not find filter %s in calculated colors!' % (f))
            match = np.where(simdata[idxObs]['filter'] == f)[0]
            dmagColor[match] = dmagColorDict[f]
        magFilter = ephs['magV'] + dmagColor
        # Calculate trailing and detection loses.
        dmagTrail, dmagDetect = self._calcMagLosses(
            ephs['velocity'], simdata[seeingCol][idxObs],
            simdata[expTimeCol][idxObs])
        # Turn into a recarray so it's easier below.
        dmags = np.rec.fromarrays(
            [magFilter, dmagColor, dmagTrail, dmagDetect],
            names=['magFilter', 'dmagColor', 'dmagTrail', 'dmagDetect'])

        outCols = [
            'objId',
        ] + list(ephs.dtype.names) + list(simdata.dtype.names) + list(
            dmags.dtype.names)

        if not self.wroteHeader:
            writestring = ''
            for col in outCols:
                writestring += '%s ' % (col)
            self.outfile.write('%s\n' % (writestring))
            self.wroteHeader = True

        # Write results.
        for eph, simdat, dm in zip(ephs, simdata[idxObs], dmags):
            writestring = '%s ' % (objId)
            for col in ephs.dtype.names:
                writestring += '%s ' % (eph[col])
            for col in simdat.dtype.names:
                writestring += '%s ' % (simdat[col])
            for col in dm.dtype.names:
                writestring += '%s ' % (dm[col])
            self.outfile.write('%s\n' % (writestring))
        self.outfile.flush()
Example #55
0
    def keypoints(self, image, mask=None):
        """Calculates the keypoints of the image

        TODO: use a temporary list with events and use a single test at the end

        :param image: ndimage of 2D (or 3D if RGB)
        :param mask: TODO: implement a mask for sieving out the keypoints
        :return: vector of keypoint (1D numpy array)
        """
        # self.reset_timer()
        with self.sem:
            total_size = 0
            keypoints = []
            descriptors = []
            assert image.shape[:2] == self.shape
            assert image.dtype in [self.dtype, numpy.float32]
            # old versions of pyopencl do not check for data contiguity
            if not (isinstance(image, pyopencl.array.Array)) and not (
                    image.flags["C_CONTIGUOUS"]):
                image = numpy.ascontiguousarray(image)
            t0 = time.time()

            if image.dtype == numpy.float32:
                if isinstance(image, pyopencl.array.Array):
                    evt = pyopencl.enqueue_copy(self.queue,
                                                self.cl_mem["scale_0"].data,
                                                image.data)
                else:
                    evt = pyopencl.enqueue_copy(self.queue,
                                                self.cl_mem["scale_0"].data,
                                                image)
                if self.profile:
                    self.events.append(("copy H->D", evt))
            elif self.dtype == numpy.float64:
                # A preprocessing kernel double_to_float exists, but is commented (RUNS ONLY ON GPU WITH FP64)
                # TODO: benchmark this kernel vs the current pure CPU format conversion with numpy.float32
                #       and uncomment it if it proves faster (dubious, because of data transfer bottleneck)
                evt = pyopencl.enqueue_copy(self.queue,
                                            self.cl_mem["scale_0"].data,
                                            image.astype(numpy.float32))
                if self.profile:
                    self.events.append(("copy H->D", evt))
            elif (len(image.shape) == 3) and (image.dtype
                                              == numpy.uint8) and (self.RGB):
                if isinstance(image, pyopencl.array.Array):
                    evt = pyopencl.enqueue_copy(self.queue,
                                                self.cl_mem["raw"].data,
                                                image.data)
                else:
                    evt = pyopencl.enqueue_copy(self.queue,
                                                self.cl_mem["raw"].data, image)
                if self.profile:
                    self.events.append(("copy H->D", evt))

                evt = self.kernels.get_kernel("rgb_to_float")(
                    self.queue, self.procsize[0], self.wgsize[0],
                    self.cl_mem["raw"].data, self.cl_mem["scale_0"].data,
                    *self.scales[0])
                if self.profile:
                    self.events.append(("RGB -> float", evt))

            elif self.dtype in self.converter:
                program = self.kernels.get_kernel(self.converter[self.dtype])
                evt = pyopencl.enqueue_copy(self.queue,
                                            self.cl_mem["raw"].data, image)
                if self.profile:
                    self.events.append(("copy H->D", evt))
                evt = program(self.queue, self.procsize[0], self.wgsize[0],
                              self.cl_mem["raw"].data,
                              self.cl_mem["scale_0"].data, *self.scales[0])
                if self.profile:
                    self.events.append(("convert -> float", evt))
            else:
                raise RuntimeError("invalid input format error (%s)" %
                                   (str(self.dtype)))

            wg1 = self.kernels_wg["max_min_global_stage1"]
            wg2 = self.kernels_wg["max_min_global_stage2"]
            if min(wg1, wg2) < self.red_size:
                # common bug on OSX when running on CPU
                logger.info(
                    "Unable to use MinMax Reduction: stage1 wg: %s; stage2 wg: %s < max_work_group_size: %s, expected: %s",
                    wg1, wg2, self.block_size, self.red_size)
                kernel = self.kernels.get_kernel("max_min_vec16")
                k = kernel(self.queue, (1, ), (1, ),
                           self.cl_mem["scale_0"].data,
                           numpy.int32(self.shape[0] * self.shape[1]),
                           self.cl_mem["max"].data, self.cl_mem["min"].data)
                if self.profile:
                    self.events.append(("max_min_serial", k))
                # python implementation:
                # buffer_ = self.cl_mem["scale_0"].get()
                # self.cl_mem["max"].set(numpy.array([buffer_.max()], dtype=numpy.float32))
                # self.cl_mem["min"].set(numpy.array([buffer_.min()], dtype=numpy.float32))
            else:
                kernel1 = self.kernels.get_kernel("max_min_global_stage1")
                kernel2 = self.kernels.get_kernel("max_min_global_stage2")
                # logger.debug("self.red_size: %s", self.red_size)
                shm = pyopencl.LocalMemory(self.red_size * 2 * 4)
                k1 = kernel1(self.queue, (self.red_size * self.red_size, ),
                             (self.red_size, ), self.cl_mem["scale_0"].data,
                             self.cl_mem["max_min"].data,
                             numpy.int32(self.shape[0] * self.shape[1]), shm)
                k2 = kernel2(self.queue, (self.red_size, ), (self.red_size, ),
                             self.cl_mem["max_min"].data,
                             self.cl_mem["max"].data, self.cl_mem["min"].data,
                             shm)

                if self.profile:
                    self.events.append(("max_min_stage1", k1))
                    self.events.append(("max_min_stage2", k2))

            evt = self.kernels.get_kernel("normalizes")(
                self.queue, self.procsize[0], self.wgsize[0],
                self.cl_mem["scale_0"].data, self.cl_mem["min"].data,
                self.cl_mem["max"].data, self.cl_mem["255"].data,
                *self.scales[0])
            if self.profile:
                self.events.append(("normalize", evt))

            curSigma = 1.0 if par.DoubleImSize else 0.5
            octave = 0
            if self._init_sigma > curSigma:
                logger.debug("Bluring image to achieve std: %f",
                             self._init_sigma)
                sigma = math.sqrt(self._init_sigma**2 - curSigma**2)
                self._gaussian_convolution(self.cl_mem["scale_0"],
                                           self.cl_mem["scale_0"], sigma, 0)

            for octave in range(self.octave_max):
                kp, descriptor = self._one_octave(octave)
                logger.info("in octave %i found %i kp" % (octave, kp.shape[0]))

                if len(kp):
                    # sieve out coordinates with NaNs
                    mask = numpy.where(
                        numpy.logical_not(numpy.isnan(kp.sum(axis=-1))))
                    keypoints.append(kp[mask])
                    descriptors.append(descriptor[mask])
                    total_size += len(mask[0])

            ########################################################################
            # Merge keypoints in central memory
            ########################################################################
            output = numpy.recarray(shape=(total_size, ), dtype=self.dtype_kp)
            last = 0
            for ds, desc in zip(keypoints, descriptors):
                l = ds.shape[0]
                if l > 0:
                    output[last:last + l].x = ds[:, 0]
                    output[last:last + l].y = ds[:, 1]
                    output[last:last + l].scale = ds[:, 2]
                    output[last:last + l].angle = ds[:, 3]
                    output[last:last + l].desc = desc
                    last += l
            logger.info("Execution time: %.3fms" % (1000 * (time.time() - t0)))
        return output
Example #56
0
def read_data(args, work, limit_bands=None, prefix='piff'):
    import fitsio

    RESERVED = 64
    BAD_CCDS = [2, 31, 61]

    if args.file != '':
        print('Read file ',args.file)
        with open(args.file) as fin:
            exps = [ line.strip() for line in fin if line[0] != '#' ]
        print('File included %d exposures'%len(exps))
    else:
        exps = args.exps
        print('Explicit listing of %d exposures'%len(exps))
    exps = sorted(exps)

    keys = ['ra', 'dec', 'x', 'y', 'mag', 'obs_e1', 'obs_e2', 'obs_T',
            prefix+'_e1', prefix+'_e2', prefix+'_T']
    all_data = { key : [] for key in keys }
    all_keys = keys

    all_data['exp'] = []
    all_data['ccd'] = []
    all_keys = all_keys + ['exp', 'ccd' ]

    if 'x' in keys:
        all_data['fov_x'] = []
        all_data['fov_y'] = []
        all_keys = all_keys + ['fov_x', 'fov_y']

    inkeys = keys

    all_bands = []  # This keeps track of the band for each record
    #all_tilings = []  # This keeps track of the tiling for each record
    bands = set()   # This is the set of all bands being used
    #tilings = set()   # This is the set of all tilings being used

    for exp in exps:

        print('Start work on exp = ',exp)
        expnum = int(exp)
        print('expnum = ',expnum)
        expinfo = fitsio.read(os.path.join(work, exp, 'exp_info_%d.fits'%expnum))

        if expnum not in expinfo['expnum']:
            print('expnum is not in expinfo!')
            print('expinfo[expnum] = ',expinfo['expnum'])
            print('Could not find information about this expnum.  Skipping ',run,exp)
            continue
        i = np.nonzero(expinfo['expnum'] == expnum)[0][0]
        #print('i = ',i)
        band = expinfo['band'][i]
        #print('band[k] = ',band)
        if (limit_bands is not None) and (band not in limit_bands):
            print('Not doing band = %s.'%band)
            continue

        #tiling = int(expinfo['tiling'][k])
        #print('tiling[k] = ',tiling)

        #if tiling == 0:
            # This shouldn't happen, but it did for a few exposures.  Just skip them, since this
            # might indicate some kind of problem.
            #print('tiling == 0.  Skip this exposure.')
            #continue

        #if tiling > args.max_tiling:
            #print('tiling is > %d.  Skip this exposure.'%args.max_tiling)
            #continue

        for k in range(len(expinfo)):
            ccdnum = expinfo[k]['ccdnum']
            if expinfo[k]['flag'] != 0:
                print('Skipping ccd %d because it is blacklisted: '%ccdnum, expinfo[k]['flag'])
                continue
            if ccdnum in BAD_CCDS:
                print('Skipping ccd %d because it is BAD'%ccdnum)
                continue

            cat_file = os.path.join(work, exp, "psf_cat_%d_%d.fits"%(expnum,ccdnum))
            #print('cat_file = ',cat_file)
            try:
                data = fitsio.read(cat_file)
                flag = data[prefix+'_flag']
            except (OSError, IOError):
                print('Unable to open cat_file %s.  Skipping this file.'%cat_file)
                continue

            ntot = len(data)
            nused = np.sum((flag & 1) != 0)
            nreserved = np.sum((flag & RESERVED) != 0)
            ngood = np.sum(flag == 0)
            #print('nused = ',nused)
            #print('nreserved = ',nreserved)
            #print('ngood = ',ngood)

            if args.use_reserved:
                mask = (flag == RESERVED) | (flag == RESERVED+1)
            else:
                mask = (flag == 0)
            #print('mask = ',mask)

            T = data['obs_T']
            dT = (data[prefix + '_T'] - data['obs_T'])
            de1 = (data[prefix + '_e1'] - data['obs_e1'])
            de2 = (data[prefix + '_e2'] - data['obs_e2'])
            used = (flag == 0)
            #if np.std(dT[used]/T[used]) > 0.03:
                #continue
            #if np.std(de1[used]) > 0.02:
                #continue
            #if np.std(de2[used]) > 0.02:
                #continue
            good = (abs(dT/data['obs_T']) < 0.1) & (abs(de1) < 0.1) & (abs(de2) < 0.1)
            mask = mask & good
 
            ngood = np.sum(mask)
            #print('ngood = ',ngood,'/',len(data))
            assert ngood == len(data[mask])
            if ngood == 0:
                print('All objects in ccd %d are flagged.'%ccdnum)
                print('Probably due to astrometry flags. Skip this exposure.')
                continue

            for key, inkey in zip(keys, inkeys):
                all_data[key].append(data[inkey][mask])

            all_data['exp'].append([expnum] * ngood)
            all_data['ccd'].append([ccdnum] * ngood)

            if 'x' in keys:
                # Convert to focal position.
                x,y = toFocal(ccdnum, data['x'][mask], data['y'][mask])
                # This comes back in units of mm.  Convert to arcsec.
                # 1 pixel = 15e-3 mm = 0.263 arcsec
                x *= 0.263/15e-3
                y *= 0.263/15e-3
                all_data['fov_x'].append(x)
                all_data['fov_y'].append(y)

            all_bands.extend( ([band] * ngood) )
            #all_tilings.extend( ([tiling] * ngood) )
            bands.add(band)
            #tilings.add(tiling)

    print('\nFinished processing all exposures')
    print('bands = ',bands)
    #print('tilings = ',tilings)

    # Turn the data into a recarray
    print('all_data.keys = ',all_data.keys())
    formats = ['f8'] * len(all_keys) + ['a1', 'i2']
    #names = all_keys + ['band', 'tiling']
    names = all_keys + ['band']
    data = np.recarray(shape = (len(all_bands),),
                          formats = formats, names = names)
    print('data.dtype = ',data.dtype)
    for key in all_keys:
        data[key] = np.concatenate(all_data[key])
    data['band'] = all_bands
    #data['tiling'] = all_tilings
    print('made recarray')

    tilings = None
    return data, bands, tilings
Example #57
0
def unwrap_specobjid(specObjID, run2d_integer=False, specLineIndex=False):
    """Unwrap CAS-style specObjID into plate, fiber, mjd, run2d.

    See :func:`~pydl.pydlutils.sdss.sdss_specobjid` for details on how the
    bits within a specObjID are assigned.

    Parameters
    ----------
    specObjID : :class:`numpy.ndarray`
        An array containing 64-bit integers or strings.  If strings are passed,
        they will be converted to integers internally.
    run2d_integer : :class:`bool`, optional
        If ``True``, do *not* attempt to convert the encoded run2d values
        to a string of the form 'vN_M_P'.
    specLineIndex : :class:`bool`, optional
        If ``True`` interpret any low-order bits as being an 'index'
        rather than a 'line'.

    Returns
    -------
    :class:`numpy.recarray`
        A record array with the same length as `specObjID`, with the columns
        'plate', 'fiber', 'mjd', 'run2d', 'line'.

    Examples
    --------
    >>> from numpy import array, uint64
    >>> from pydl.pydlutils.sdss import unwrap_specobjid
    >>> unwrap_specobjid(array([4565636362342690816], dtype=uint64))
    rec.array([(4055, 408, 55359, 'v5_7_0', 0)],
              dtype=[('plate', '<i4'), ('fiber', '<i4'), ('mjd', '<i4'), ('run2d', '<U8'), ('line', '<i4')])

    """
    if (specObjID.dtype.type is np.string_
            or specObjID.dtype.type is np.unicode_):
        tempobjid = specObjID.astype(np.uint64)
    elif specObjID.dtype.type is np.uint64:
        tempobjid = specObjID.copy()
    else:
        raise ValueError('Unrecognized type for specObjID!')
    run2d_dtype = 'U8'
    if run2d_integer:
        run2d_dtype = 'i4'
    line = 'line'
    if specLineIndex:
        line = 'index'
    unwrap = np.recarray(specObjID.shape,
                         dtype=[('plate', 'i4'), ('fiber', 'i4'),
                                ('mjd', 'i4'), ('run2d', run2d_dtype),
                                (line, 'i4')])
    unwrap.plate = np.bitwise_and(tempobjid >> 50, 2**14 - 1)
    unwrap.fiber = np.bitwise_and(tempobjid >> 38, 2**12 - 1)
    unwrap.mjd = np.bitwise_and(tempobjid >> 24, 2**14 - 1) + 50000
    run2d = np.bitwise_and(tempobjid >> 10, 2**14 - 1)
    if run2d_integer:
        unwrap.run2d = run2d
    else:
        N = ((run2d // 10000) + 5).tolist()
        M = ((run2d % 10000) // 100).tolist()
        P = (run2d % 100).tolist()
        unwrap.run2d = [
            'v{0:d}_{1:d}_{2:d}'.format(n, m, p) for n, m, p in zip(N, M, P)
        ]
    unwrap[line] = np.bitwise_and(tempobjid, 2**10 - 1)
    return unwrap
Example #58
0
def ReadMagData(Date, Minute=False, res=None, Ab=None, DetectGaps=None):
    '''
	Reads binary magnetometer data from MESSENGER.
	
	Args:
		Date:		32-bit(minimum) integer with date in format yyyymmdd.
		Minute:		If True - routing will read minute averages of MAG data,
					if False, then full resolution data will be read.
		res:		Set resample resolution in seconds for data, by default res=None -
					no resampling, res=0.05 for evenly spaced 20Hz sampling.
		Ab:			Angle to aberate X and Y components of the data by, in degrees.
					When set to None, the aberation angle will be found automatically.
		DetectGaps:	Largest data gap size (in hours) to interpolate over, 
					if DetectGaps=None then all gaps will be interpolated over,
					otherwise gaps will be filled with NaN.
					
					
	Returns:
		np.recarray of MAG data
	
	'''
    fname = '{:08d}.bin'.format(Date)
    if Minute == True:
        path = Globals.MessPath + 'MAG/Binary/MSO/Minute/'
    else:
        path = Globals.MessPath + 'MAG/Binary/MSO/Full/'

    dtype = [('Date', 'int32'), ('ut', 'float32'), ('Xmso', 'float32'),
             ('Ymso', 'float32'), ('Zmso', 'float32'), ('Xmsm', 'float32'),
             ('Ymsm', 'float32'), ('Zmsm', 'float32'), ('Bx', 'float32'),
             ('By', 'float32'), ('Bz', 'float32')]

    if os.path.isfile(path + fname) == False:
        out = np.recarray(0, dtype=dtype)
        return out

    data = RT.ReadRecarray(path + fname, dtype)

    if Ab is None:
        tmp = GetAberrationAngle(Date)
        Ab = tmp.Angle

    if Ab != 0.0:
        #rotate spacecraft position into aberrated coords
        data.Xmsm, data.Ymsm = RotTrans(data.Xmsm, data.Ymsm,
                                        Ab * np.pi / 180.0)
        data.Xmso, data.Ymso = RotTrans(data.Xmso, data.Ymso,
                                        Ab * np.pi / 180.0)
        #rotate bx,by into aberrated coordinate system
        data.Bx, data.By = RotTrans(data.Bx, data.By, Ab * np.pi / 180.0)

    if res != None:
        UTo = np.array(data.ut)

        length = np.int32(86400 / res)
        newdata = np.recarray(length, dtype=dtype)

        ntags = np.size(data.dtype.names)
        newdata.ut = 24 * np.arange(length, dtype='float32') / length
        newdata.Date = Date
        for i in range(2, ntags):
            f = InterpolatedUnivariateSpline(data.ut,
                                             data[data.dtype.names[i]])
            newdata[newdata.dtype.names[i]] = f(newdata.ut)

        if DetectGaps != None:
            #set Detect gaps to the largest number of seconds gap (5s is used elsewhere)
            MaxUTGapHr = DetectGaps / 3600.0
            bad = np.zeros(length, dtype='bool')
            for i in range(0, UTo.size - 1):
                if (UTo[i + 1] - UTo[i]) > MaxUTGapHr:
                    b = np.where((newdata.ut > UTo[i])
                                 & (newdata.ut < UTo[i + 1]))[0]
                    bad[b] = True

            baddata = np.where(bad)[0]
            dtags = ['Bx', 'By', 'Bz']
            for i in range(0, 6):
                if dtags[i] in data.dtype.names:
                    newdata[dtags[i]][baddata] = np.float32(np.nan)

        return newdata
    else:
        return data
Example #59
0
    df = pd.DataFrame(a, columns=['date', 'val', 'character_col'])
    # Convert into numpy recarray to preserve the dtypes
    np_array = df.to_records(index=False)
    del df
    shape, dtype = np_array.shape, np_array.dtype
    print(f"np_array's size={np_array.nbytes/1e6}MB")

    # With shared memory
    # Start tracking memory usage
    tracemalloc.start()
    start_time = time.time()
    with SharedMemoryManager() as smm:
        # Create a shared memory of size np_arry.nbytes
        shm = smm.SharedMemory(np_array.nbytes)
        # Create a np.recarray using the buffer of shm
        shm_np_array = np.recarray(shape=shape, dtype=dtype, buf=shm.buf)
        # Copy the data into the shared memory
        np.copyto(shm_np_array, np_array)
        # Spawn some processes to do some work
        with ProcessPoolExecutor(cpu_count()) as exe:
            fs = [exe.submit(work_with_shared_memory, shm.name, shape, dtype)
                  for _ in range(cpu_count())]
            for _ in as_completed(fs):
                pass
    # Check memory usage
    current, peak = tracemalloc.get_traced_memory()
    print(f"Current memory usage {current/1e6}MB; Peak: {peak/1e6}MB")
    print(f'Time elapsed: {time.time()-start_time:.2f}s')
    tracemalloc.stop()

    # Without shared memory
Example #60
0
    def test_ichimoku_kinko_hyo(self):
        window_length = 52
        today = pd.Timestamp('2014', tz='utc')
        nassets = 5
        assets = pd.Index(np.arange(nassets))
        days_col = np.arange(window_length)[:, np.newaxis]
        highs = np.arange(nassets) + 2 + days_col
        closes = np.arange(nassets) + 1 + days_col
        lows = np.arange(nassets) + days_col

        tenkan_sen_length = 9
        kijun_sen_length = 26
        chikou_span_length = 26
        ichimoku_kinko_hyo = IchimokuKinkoHyo(
            window_length=window_length,
            tenkan_sen_length=tenkan_sen_length,
            kijun_sen_length=kijun_sen_length,
            chikou_span_length=chikou_span_length,
        )

        dtype = [
            ('tenkan_sen', 'f8'),
            ('kijun_sen', 'f8'),
            ('senkou_span_a', 'f8'),
            ('senkou_span_b', 'f8'),
            ('chikou_span', 'f8'),
        ]
        out = np.recarray(
            shape=(nassets,),
            dtype=dtype,
            buf=np.empty(shape=(nassets,), dtype=dtype),
        )
        ichimoku_kinko_hyo.compute(
            today,
            assets,
            out,
            highs,
            lows,
            closes,
            tenkan_sen_length,
            kijun_sen_length,
            chikou_span_length,
        )

        expected_tenkan_sen = np.array([
            (53 + 43) / 2,
            (54 + 44) / 2,
            (55 + 45) / 2,
            (56 + 46) / 2,
            (57 + 47) / 2,
        ])
        expected_kijun_sen = np.array([
            (53 + 26) / 2,
            (54 + 27) / 2,
            (55 + 28) / 2,
            (56 + 29) / 2,
            (57 + 30) / 2,
        ])
        expected_senkou_span_a = (expected_tenkan_sen + expected_kijun_sen) / 2
        expected_senkou_span_b = np.array([
            (53 + 0) / 2,
            (54 + 1) / 2,
            (55 + 2) / 2,
            (56 + 3) / 2,
            (57 + 4) / 2,
        ])
        expected_chikou_span = np.array([
            27.0,
            28.0,
            29.0,
            30.0,
            31.0,
        ])

        assert_equal(
            out.tenkan_sen,
            expected_tenkan_sen,
            msg='tenkan_sen',
        )
        assert_equal(
            out.kijun_sen,
            expected_kijun_sen,
            msg='kijun_sen',
        )
        assert_equal(
            out.senkou_span_a,
            expected_senkou_span_a,
            msg='senkou_span_a',
        )
        assert_equal(
            out.senkou_span_b,
            expected_senkou_span_b,
            msg='senkou_span_b',
        )
        assert_equal(
            out.chikou_span,
            expected_chikou_span,
            msg='chikou_span',
        )