def events(self): if isinstance(self.__events, types.NoneType): # the indices of the channels within this cluster cidx = [channel.index() for channel in self] #print cidx # the eventdata from where to extract data = self.__eventdata._data #select all events for this cluster eventmask = data['clid'] == self.__index #select first and last frame eventmask[0] = True eventmask[-1] = True #create recarray that stores the events of this cluster if self.__model == dyk: self.__events = numpy.recarray(shape = (eventmask.sum()),dtype = [('t', '<f8'), ('noch', '<i2'), ('chid', int), ('states', '|i1', (len(cidx), 8))]) elif self.__model == deterministic: self.__events = numpy.recarray(shape = (eventmask.sum()),dtype = [('t', '<f8'), ('noch', '<i2'), ('chid', int), ('states', bool, (len(cidx),))]) # copy time chid and subspace of state column to new recarray self.__events['t'] = data[eventmask]['t'] self.__events['chid'] = data[eventmask]['chid'] self.__events['states'] = data[eventmask]['states'][:,self.__index,...] # cache the number of open channels model = self.__eventdata.model() self.__events['noch'] = model.open(self.__events).sum(-1) return self.__events
def selectOnSharpeRatio(self, ls_symbols, top_n_equities=10): ''' Choose the best portfolio over the stock universe, according to their sharpe ratio''' #TODO: change this to a DataAccess utilitie -------------- symbols, files = getAllFromCSV() datalength = len(recfromcsv(files[0])['close']) print('Datalength: {}'.format(datalength)) #--------------------------------------------------------- #Initiaing data arrays closes = np.recarray((datalength,), dtype=[(symbol, 'float') for symbol in symbols]) daily_ret = np.recarray((datalength - 1,), dtype=[(symbol, 'float') for symbol in symbols]) average_returns = np.zeros(len(files)) return_stdev = np.zeros(len(files)) sharpe_ratios = np.zeros(len(files)) cumulative_returns = np.recarray((datalength-1,), dtype=[(symbol, 'float') for symbol in symbols]) # Here is the meat #TODO: data = dataobj.getData(ls_symbols) for i, symbol in enumerate(ls_symbols): if len(data) != datalength: continue print('Processing {} file'.format(file)) closes[symbols[i]] = data['close'][::-1] daily_ret[symbols[i]] = dailyReturns() # We now can compute: average_returns[i] = daily_ret[symbols[i]].mean() return_stdev[i] = daily_ret[symbols[i]].stdev() sharpe_ratios[i] = (average_returns[i] / return_stdev[i]) * np.sqrt(datalength) # compare to course print('\tavg: {}, stdev: {}, sharpe ratio: {}'.format(average_returns[i], return_stdev[i], sharpe_ratios[i])) sorted_sharpe_indices = np.argsort(sharpe_ratios)[::-1][0:top_n_equities] #TODO: return a disct as {symbol: sharpe_ratio}, or a df with all 3 components return sorted_sharpe_indices
def __init__(self, analyzer): n = analyzer.frames_in_flight fw, fh = analyzer.camera.frame_size c = analyzer.camera.channels rw, rh = analyzer.rectifier.image_size self.frames = numpy.recarray(n, [ ("index", "u4"), ("timestamp", "f8"), ("image", "u1", (fh, fw, c)), ("image_f", "f4", (fh, fw, c)), ("table", analyzer.table_tracker.dtype), ("rectification", "f4", (rh, rw, c)), ("background", analyzer.background_analyzer.dtype), ("team_foosmen", analyzer.team_foosmen_analyzer.dtype, len(analyzer.table.teams)), ("ball", analyzer.ball_analyzer.dtype), ("rod", [ ("%s_%s" % (rod.type.name, rod.team.name), analyzer.rod_analyzer[i].dtype) for i, rod in enumerate(analyzer.table.rods) ]) ]) self.background = numpy.recarray((), [ ("color_mean", "f4", (rh, rw, c)), ("variance", "f8", (c, c)), ("q_estimation", "f4", (rh, rw)) ]) self.team_foosmen = numpy.recarray(len(analyzer.table.teams), [ ("color_mean", "f4", (rh, rw, c)), ("variance", "f8", (c, c)), ])
def events(self): '''return a rec array with all events of this channel''' if self.__events == None: #the event data from where to extract data = self.__eventdata._data #select all events for this cluster eventmask = data['chid'] == self.__index # if there are no events return empty recarray if len(data) == 0: self.__events = numpy.recarray(shape = (0),dtype = [('t', '<f8'), ('states', self.__model.state_type())]) return self.__events #select first and last frame eventmask[0] = True eventmask[-1] = True #create recarray that stores the events of this channel # this works for DYK # self.__events = numpy.recarray(shape = (eventmask.sum()),dtype = [('t', '<f8'), ('states', '|i1', 8)]) self.__events = numpy.recarray(shape = (eventmask.sum()),dtype = [('t', '<f8'), ('states', self.__model.state_type())]) # copy time chid and subspace of state column to new recarray self.__events['t'] = data[eventmask]['t'] # this works for DYK #self.__events['states'] = data[eventmask]['states'][:,self.__index,:] self.__events['states'] = data[eventmask]['states'][:,self.clusterindex(),self.__index] return self.__events
def test_multiple_args_records(self): pyfunc = foobar mystruct_dt = np.dtype([('p', np.float64), ('row', np.float64), ('col', np.float64)]) mystruct = numpy_support.from_dtype(mystruct_dt) cres = compile_isolated(pyfunc, [mystruct[:], types.uint64, types.uint64], return_type=mystruct[:]) cfunc = cres.entry_point st1 = np.recarray(3, dtype=mystruct_dt) st2 = np.recarray(3, dtype=mystruct_dt) st1.p = np.arange(st1.size) + 1 st1.row = np.arange(st1.size) + 1 st1.col = np.arange(st1.size) + 1 st2.p = np.arange(st2.size) + 1 st2.row = np.arange(st2.size) + 1 st2.col = np.arange(st2.size) + 1 test_fail_args = ((st1, -1, st2), (st1, st2, -1)) # TypeError is for 2.6 if sys.version_info >= (2, 7): with self.assertRaises(OverflowError): for a, b, c in test_fail_args: cfunc(a, b, c) else: with self.assertRaises(TypeError): for a, b, c in test_fail_args: cfunc(a, b, c)
def _get_subheaders(self): """retreive all subheaders and return list of subheader recarrays """ subheaders = [] header = self._header endianness = self.endianness dt = self._subhdrdtype if not self.endianness is native_code: dt = self._subhdrdtype.newbyteorder(self.endianness) if self._header['num_frames'] > 1: for item in self._mlist._mlist: if item[1] == 0: break self.fileobj.seek(0) offset = (int(item[1])-1)*512 self.fileobj.seek(offset) tmpdat = self.fileobj.read(512) sh = (np.recarray(shape=(), dtype=dt, buf=tmpdat)) subheaders.append(sh.copy()) else: self.fileobj.seek(0) offset = (int(self._mlist._mlist[0][1])-1)*512 self.fileobj.seek(offset) tmpdat = self.fileobj.read(512) sh = (np.recarray(shape=(), dtype=dt, buf=tmpdat)) subheaders.append(sh) return subheaders
def test_structured_arrays(self): def check(arr, dtype, ndim, layout, aligned): ty = typeof(arr) self.assertIsInstance(ty, types.Array) self.assertEqual(ty.dtype, dtype) self.assertEqual(ty.ndim, ndim) self.assertEqual(ty.layout, layout) self.assertEqual(ty.aligned, aligned) dtype = np.dtype([('m', np.int32), ('n', 'S5')]) rec_ty = numpy_support.from_struct_dtype(dtype) arr = np.empty(4, dtype=dtype) check(arr, rec_ty, 1, "C", False) arr = np.recarray(4, dtype=dtype) check(arr, rec_ty, 1, "C", False) dtype = np.dtype([('m', np.int32), ('n', 'S5')], align=True) rec_ty = numpy_support.from_struct_dtype(dtype) # On Numpy 1.6, align=True doesn't align the itemsize actual_aligned = numpy_support.version >= (1, 7) arr = np.empty(4, dtype=dtype) check(arr, rec_ty, 1, "C", actual_aligned) arr = np.recarray(4, dtype=dtype) check(arr, rec_ty, 1, "C", actual_aligned)
def Xi2_line_ratios(obs_ratios, arxvPDR): '''Computes the Xi2 statistic given the observed lines and a PDR arxv.''' allData = numpy.recarray([],[('x', 'f8'),('y', 'f8'),('z', 'f8'),('t', 'f8'),('v', 'f8'),]) models = {} specStrs, codes = obs_ratios.species_and_codes() #collecting all the line intensities of the ratios involved in the observations (obs_ratio) #from the model database. Proccessing one Av at a time... for i, AvStr in enumerate(arxvPDR.radexDbs): Av = numpy.float64(AvStr) #array which will hold the grid points and the values for this Av data = numpy.recarray((arxvPDR.nMeshes), allData.dtype.descr) #getting the emissions for each line from the PDR database for all the models for the current Av for code in codes: models[code] = 10.0**arxvPDR.get_emissions_from_databases(line={'type':'radex-lvg', 'code':code}, Av_use=Av) #defining the array which will hold the Xi2 for all the models for this Av Xi2 = numpy.zeros(arxvPDR.nMeshes, 'f8') #compute the Xi2 for obs_ratio in obs_ratios: #the line codes invloved in this ratio code1, code2 = codes_from_ratio(obs_ratio) #the ratios for all the models at this Av for this particular line ratio model_ratio = models[code1] / models[code2] #computing the Xi2 f_o = obs_ratios[obs_ratio]['v'] f_e = obs_ratios[obs_ratio]['e'] f_m = model_ratio Xi2 += ((f_m - f_o)/f_e)**2.0 # data.x = arxvPDR.grid_x data.y = arxvPDR.grid_y data.z = arxvPDR.grid_z data.t = Av data.v = Xi2 allData = numpy.hstack((allData, data) ) #removing the first entry (redundant ;;; .. todo:: fix this [low priority]) allData = allData[1::] #filtering out the points which have Nans inds_not_nan = numpy.where( numpy.isfinite(allData['v']) ) return allData[inds_not_nan] return allData[1::]
def test_usecase1(self): pyfunc = usecase1 # This is an unaligned dtype mystruct_dt = numpy.dtype([('p', numpy.float64), ('row', numpy.float64), ('col', numpy.float64)]) mystruct = numpy_support.from_dtype(mystruct_dt) cres = compile_isolated(pyfunc, (mystruct[:], mystruct[:])) cfunc = cres.entry_point st1 = numpy.recarray(3, dtype=mystruct_dt) st2 = numpy.recarray(3, dtype=mystruct_dt) st1.p = numpy.arange(st1.size) + 1 st1.row = numpy.arange(st1.size) + 1 st1.col = numpy.arange(st1.size) + 1 st2.p = numpy.arange(st2.size) + 1 st2.row = numpy.arange(st2.size) + 1 st2.col = numpy.arange(st2.size) + 1 expect1 = st1.copy() expect2 = st2.copy() got1 = expect1.copy() got2 = expect2.copy() pyfunc(expect1, expect2) cfunc(got1, got2) self.assertTrue(numpy.all(expect1 == got1)) self.assertTrue(numpy.all(expect2 == got2))
def addfield(mrecord, newfield, newfieldname=None): """Adds a new field to the masked record array, using `newfield` as data and `newfieldname` as name. If `newfieldname` is None, the new field name is set to 'fi', where `i` is the number of existing fields. """ _data = mrecord._data _mask = mrecord._mask if newfieldname is None or newfieldname in reserved_fields: newfieldname = 'f%i' % len(_data.dtype) newfield = ma.array(newfield) # Get the new data ............ # Create a new empty recarray newdtype = np.dtype(_data.dtype.descr + [(newfieldname, newfield.dtype)]) newdata = recarray(_data.shape, newdtype) # Add the exisintg field [newdata.setfield(_data.getfield(*f), *f) for f in _data.dtype.fields.values()] # Add the new field newdata.setfield(newfield._data, *newdata.dtype.fields[newfieldname]) newdata = newdata.view(MaskedRecords) # Get the new mask ............. # Create a new empty recarray newmdtype = np.dtype([(n, bool_) for n in newdtype.names]) newmask = recarray(_data.shape, newmdtype) # Add the old masks [newmask.setfield(_mask.getfield(*f), *f) for f in _mask.dtype.fields.values()] # Add the mask of the new field newmask.setfield(getmaskarray(newfield), *newmask.dtype.fields[newfieldname]) newdata._mask = newmask return newdata
def test_add_data_then_read(self): data0 = np.recarray((1,), dtype=[("f0", "<f8"), ("f1", "<f8"), ("f2", "<f8")]) data0[0] = (1, 2, 3) data1 = np.recarray((1,), dtype=[("f0", "<f8"), ("f1", "<f8"), ("f2", "<f8")]) data1[0] = (4, 5, 6) self.data.addData(data0) self.data.addData(data1) self.assert_data_in_backend(self.data, [[1, 2, 3], [4, 5, 6]])
def create_neurohdf_file(filename, data): with closing(h5py.File(filename, 'w')) as hfile: hfile.attrs['neurohdf_version'] = '0.1' mcgroup = hfile.create_group("Microcircuit") mcgroup.attrs['node_type'] = 'irregular_dataset' vert = mcgroup.create_group("vertices") conn = mcgroup.create_group("connectivity") vert.create_dataset("id", data=data['vert']['id']) vert.create_dataset("location", data=data['vert']['location']) verttype=vert.create_dataset("type", data=data['vert']['type']) # create rec array with two columns, value and name my_dtype = np.dtype([('value', 'l'), ('name', h5py.new_vlen(str))]) helpdict={VerticesTypeSkeletonRootNode['id']: VerticesTypeSkeletonRootNode['name'], VerticesTypeSkeletonNode['id']: VerticesTypeSkeletonNode['name'], VerticesTypeConnectorNode['id']: VerticesTypeConnectorNode['name'] } arr=np.recarray( len(helpdict), dtype=my_dtype ) for i,kv in enumerate(helpdict.items()): arr[i][0] = kv[0] arr[i][1] = kv[1] verttype.attrs['value_name']=arr vert.create_dataset("confidence", data=data['vert']['confidence']) vert.create_dataset("userid", data=data['vert']['userid']) vert.create_dataset("radius", data=data['vert']['radius']) vert.create_dataset("skeletonid", data=data['vert']['skeletonid']) vert.create_dataset("creation_time", data=data['vert']['creation_time']) vert.create_dataset("modification_time", data=data['vert']['modification_time']) conn.create_dataset("id", data=data['conn']['id']) if data['conn'].has_key('type'): conntype=conn.create_dataset("type", data=data['conn']['type']) helpdict={ConnectivityNeurite['id']: ConnectivityNeurite['name'], ConnectivityPresynaptic['id']: ConnectivityPresynaptic['name'], ConnectivityPostsynaptic['id']: ConnectivityPostsynaptic['name'] } arr=np.recarray( len(helpdict), dtype=my_dtype ) for i,kv in enumerate(helpdict.items()): arr[i][0] = kv[0] arr[i][1] = kv[1] conntype.attrs['value_name']=arr if data['conn'].has_key('skeletonid'): conn.create_dataset("skeletonid", data=data['conn']['skeletonid']) if data.has_key('meta'): metadata=mcgroup.create_group('metadata') # create recarray with two columns, skeletonid and string my_dtype = np.dtype([('skeletonid', 'l'), ('name', h5py.new_vlen(str))]) arr=np.recarray( len(data['meta']), dtype=my_dtype ) for i,kv in enumerate(data['meta'].items()): arr[i][0] = kv[0] arr[i][1] = kv[1] metadata.create_dataset('skeleton_name', data=arr )
def __init__(self, stid, nlat, elon, elev): self.stid = stid self.nlat = nlat self.elon = elon self.elev = elev # Measured data self.datat = np.recarray((NPTSt,), dtype={"names": ("flux", "sun_alt", "moon_phase"), "formats": (np.int64, np.float64, np.float64)}) self.datap = np.recarray((NPTSp,), dtype={"names": ("flux", "sun_alt", "moon_phase"), "formats": (np.int64, np.float64, np.float64)})
def make_polynomial_psf_params(ntrain, nvalidate, nvisualize): """ Make training/testing data for PSF with params varying as polynomials. """ bd = galsim.BaseDeviate(5772156649+314159) ud = galsim.UniformDeviate(bd) training_data = np.recarray((ntrain,), dtype=star_type) validate_data = np.recarray((nvalidate,), dtype=star_type) # Make randomish Chebyshev polynomial coefficients # 5 Different arrays (hlr, g1, g2, u0, v0), and up to 3rd order in each of x and y. coefs = np.empty((4, 4, 5), dtype=float) for (i, j, k), _ in np.ndenumerate(coefs): coefs[i, j, k] = 2*ud() - 1.0 for i in range(ntrain): u = ud() v = ud() flux = ud()*50+100 vals = np.polynomial.chebyshev.chebval2d(u, v, coefs)/6 # range is [-0.5, 0.5] hlr = vals[0] * 0.1 + 0.35 g1 = vals[1] * 0.1 g2 = vals[2] * 0.1 u0 = vals[3] v0 = vals[4] training_data[i] = (u, v, hlr, g1, g2, u0, v0, flux) for i in range(nvalidate): u = ud()*0.5 + 0.25 v = ud()*0.5 + 0.25 flux = 1.0 vals = np.polynomial.chebyshev.chebval2d(u, v, coefs)/6 # range is [-0.5, 0.5] hlr = vals[0] * 0.1 + 0.35 g1 = vals[1] * 0.1 g2 = vals[2] * 0.1 u0 = vals[3] v0 = vals[4] validate_data[i] = (u, v, hlr, g1, g2, u0, v0, flux) vis_data = np.recarray((nvisualize*nvisualize), dtype=star_type) u = v = np.linspace(0, 1, nvisualize) u, v = np.meshgrid(u, v) for i, (u1, v1) in enumerate(zip(u.ravel(), v.ravel())): vals = np.polynomial.chebyshev.chebval2d(u1, v1, coefs)/6 # range is [-0.5, 0.5] hlr = vals[0] * 0.1 + 0.35 g1 = vals[1] * 0.1 g2 = vals[2] * 0.1 u0 = vals[3] v0 = vals[4] vis_data[i] = (u1, v1, hlr, g1, g2, u0, v0, 1.0) return training_data, validate_data, vis_data.reshape((nvisualize, nvisualize))
def test_save_results(self): # test for 1d # test for 2d # test for 3d # test for very large nr_experiments = 10000 experiments = np.recarray((nr_experiments,), dtype=[('x', float), ('y', float)]) outcome_a = np.random.rand(nr_experiments,1) results = (experiments, {'a': outcome_a}) fn = u'../data/test.tar.gz' save_results(results, fn) os.remove(fn) # ema_logging.info('1d saved successfully') nr_experiments = 10000 nr_timesteps = 100 experiments = np.recarray((nr_experiments,), dtype=[('x', float), ('y', float)]) outcome_a = np.zeros((nr_experiments,nr_timesteps)) results = (experiments, {'a': outcome_a}) save_results(results, fn) os.remove(fn) # ema_logging.info('2d saved successfully') nr_experiments = 10000 nr_timesteps = 100 nr_replications = 10 experiments = np.recarray((nr_experiments,), dtype=[('x', float), ('y', float)]) outcome_a = np.zeros((nr_experiments,nr_timesteps,nr_replications)) results = (experiments, {'a': outcome_a}) save_results(results, fn) os.remove(fn) # ema_logging.info('3d saved successfully') nr_experiments = 500000 nr_timesteps = 100 experiments = np.recarray((nr_experiments,), dtype=[('x', float), ('y', float)]) outcome_a = np.zeros((nr_experiments,nr_timesteps)) results = (experiments, {'a': outcome_a}) save_results(results, fn) os.remove(fn)
def test_record_write_2d_array(self): """ Test writing to a 2D array within a structured type """ nbval = np.recarray(1, dtype=recordwith2darray) nbrecord = numpy_support.from_dtype(recordwith2darray) cfunc = self.get_cfunc(record_write_2d_array, (nbrecord,)) cfunc(nbval[0]) expected = np.recarray(1, dtype=recordwith2darray) expected[0].i = 3 expected[0].j[:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], np.float32).reshape(3, 2) np.testing.assert_equal(expected, nbval)
def test_record_write_array(self): ''' Testing writing to a 1D array within a structured type ''' nbval = np.recarray(1, dtype=recordwitharray) nbrecord = numpy_support.from_dtype(recordwitharray) cfunc = self.get_cfunc(record_write_array, (nbrecord,)) cfunc(nbval[0]) expected = np.recarray(1, dtype=recordwitharray) expected[0].g = 2 expected[0].h[0] = 3.0 expected[0].h[1] = 4.0 np.testing.assert_equal(expected, nbval)
def _load(self, maxentries=None): self._build_index(maxentries) incdict, cumdict = self._set_entries() if incdict is None and cumdict is None: return totim = [] for ts, sp, seekpoint in self.idx_map: tinc, tcum = self._get_sp(ts, sp, seekpoint) for entry in self.entries: incdict[entry].append(tinc[entry]) cumdict[entry].append(tcum[entry]) # Get the time for this record seekpoint = self._seek_to_string('TIME SUMMARY AT END') tslen, sptim, tt = self._get_totim(ts, sp, seekpoint) totim.append(tt) # get kstp and kper idx_array = np.array(self.idx_map) # build dtype for recarray dtype_tups = [('totim', np.float32), ("time_step", np.int32), ("stress_period", np.int32)] for entry in self.entries: dtype_tups.append((entry, np.float32)) dtype = np.dtype(dtype_tups) # create recarray nentries = len(incdict[entry]) self.inc = np.recarray(shape=(nentries,), dtype=dtype) self.cum = np.recarray(shape=(nentries,), dtype=dtype) # fill each column of the recarray for entry in self.entries: self.inc[entry] = incdict[entry] self.cum[entry] = cumdict[entry] # file the totim, time_step, and stress_period columns for the # incremental and cumulative recarrays (zero-based kstp,kper) self.inc['totim'] = np.array(totim)[:] self.inc["time_step"] = idx_array[:, 0] - 1 self.inc["stress_period"] = idx_array[:, 1] - 1 self.cum['totim'] = np.array(totim)[:] self.cum["time_step"] = idx_array[:, 0] - 1 self.cum["stress_period"] = idx_array[:, 1] - 1 return
def for_shape(self, shape): dtype = [(n,t) for n, t in self._stats_fields] data = np.recarray(shape, dtype=dtype) data[:] = 0 stats = Statistics(data=data) stats._pointer = 0 return stats
def __new__(cls, filename): """Create a new instance. Numpy array subclasses use this method instead of __init__ for initialization. """ headerDict = cls._readHeader(filename) noindex = cls._load(filename, headerDict) index = np.recarray(shape=noindex.shape, dtype=cls.row) for el in cls._raw_row[2:]: key = el[0] index.__setattr__(key, noindex.__getattribute__(key)) continue index.id = np.arange(len(noindex)) try: index.position = cls._computePosition(index.grid, headerDict) except: index.position = np.nan pass obj = index.view(cls) # Set the attributes on the snapshot for headerField in headerDict: setattr(obj, headerField, headerDict[headerField]) continue return obj
def storeStageData(stage_file, masked_image_file): ## read motor data from csv with open(stage_file) as fid: reader = csv.reader(fid) data = [line for line in reader] #if the csv lines must be larger than one (a header), othewise it is an empty file if len(data)<=1: with tables.File(masked_image_file, 'r+') as fid: dtype = [('real_time', int), ('stage_time', int), ('stage_x', float), ('stage_y', float)] fid.create_table('/', 'stage_data', obj = np.recarray(0, dtype)) return #import pdb #pdb.set_trace() #filter, check and store the data into a recarray header, data = _getHeader(data) csv_dict = _data2dict(header, data) stage_recarray = _dict2recarray(csv_dict) with tables.File(masked_image_file, 'r+') as mask_fid: if '/stage_data' in mask_fid: mask_fid.remove_node('/', 'stage_data') mask_fid.create_table('/', 'stage_data', obj = stage_recarray) return csv_dict
def as_recarray(self): """ Convert into numpy recordarray """ dtype = [(k,v.dtype) for k,v in self.__dict__.iteritems()] R = numpy.recarray(len(self.__dict__[k]),dtype=dtype) for key in self.__dict__: R[key] = self.__dict__[key] return R
def _compute_asset_lifetimes(self): """ Compute and cache a recarry of asset lifetimes. """ equities_cols = self.equities.c buf = np.array( tuple( sa.select(( equities_cols.sid, equities_cols.start_date, equities_cols.end_date, )).execute(), ), dtype='<f8', # use doubles so we get NaNs ) lifetimes = np.recarray( buf=buf, shape=(len(buf),), dtype=[ ('sid', '<f8'), ('start', '<f8'), ('end', '<f8') ], ) start = lifetimes.start end = lifetimes.end start[np.isnan(start)] = 0 # convert missing starts to 0 end[np.isnan(end)] = np.iinfo(int).max # convert missing end to INTMAX # Cast the results back down to int. return lifetimes.astype([ ('sid', '<i8'), ('start', '<i8'), ('end', '<i8'), ])
def test_mlist(self): fid = open(self.example_file, 'rb') hdr = self.header_class.from_fileobj(fid) mlist = self.mlist_class(fid, hdr) fid.seek(0) fid.seek(512) dat=fid.read(128*32) dt = np.dtype([('matlist',np.int32)]) dt = dt.newbyteorder('>') mats = np.recarray(shape=(32,4), dtype=dt, buf=dat) fid.close() #tests assert_true(mats['matlist'][0,0] + mats['matlist'][0,3] == 31) assert_true(mlist.get_frame_order()[0][0] == 0) assert_true(mlist.get_frame_order()[0][1] == 16842758.0) # test badly ordered mlist badordermlist = mlist badordermlist._mlist = np.array([[ 1.68427540e+07, 3.00000000e+00, 1.20350000e+04, 1.00000000e+00], [ 1.68427530e+07, 1.20360000e+04, 2.40680000e+04, 1.00000000e+00], [ 1.68427550e+07, 2.40690000e+04, 3.61010000e+04, 1.00000000e+00], [ 1.68427560e+07, 3.61020000e+04, 4.81340000e+04, 1.00000000e+00], [ 1.68427570e+07, 4.81350000e+04, 6.01670000e+04, 1.00000000e+00], [ 1.68427580e+07, 6.01680000e+04, 7.22000000e+04, 1.00000000e+00]]) assert_true(badordermlist.get_frame_order()[0][0] == 1)
def _allocate_output(self, windows, shape): """ Allocate an output array whose rows should be passed to `self.compute`. The resulting array must have a shape of ``shape``. If we have standard outputs (i.e. self.outputs is NotSpecified), the default is an empty ndarray whose dtype is ``self.dtype``. If we have an outputs tuple, the default is an empty recarray with ``self.outputs`` as field names. Each field will have dtype ``self.dtype``. This can be overridden to control the kind of array constructed (e.g. to produce a LabelArray instead of an ndarray). """ missing_value = self.missing_value outputs = self.outputs if outputs is not NotSpecified: out = recarray( shape, formats=[self.dtype.str] * len(outputs), names=outputs, ) out[:] = missing_value else: out = full(shape, missing_value, dtype=self.dtype) return out
def CfxCentreLineSnapshot(filename): """Factory function wrapping a CFX snapshot. Load the data with: >>> snap = CfxSnapshot(filename) Fields are constructed from the header line. """ (__raw_row, fieldUnits) = parseHeader(filename, AllData=True) __raw_row = [('id', int),] + __raw_row fieldUnits['id'] = 1 # ('position', float, (3,)), # ('strain_rate', float), # ('speed', float), # ('velocity', float, (3,)), # ('wall_shear', float, (4,))] __readable_row = np.dtype(__raw_row[1:]) row = np.dtype(__raw_row) noindex = np.genfromtxt(filename, skip_header=findStart(filename, AllData=True)+2, delimiter=',', dtype=__readable_row).view(np.recarray) index = np.recarray(shape=noindex.shape, dtype=row) index.id = np.arange(len(noindex)) for el in __raw_row[1:]: key = el[0] index.__setattr__(key, U.convert(noindex.__getattribute__(key), fieldUnits[key], hlbUnits[key])) continue return index
def tree_to_recarray_py(trees, branches=None, use_cache=False, cache_size=1000000, include_weight=False, weight_name='weight', weight_dtype='f4'): """ Convert a tree or a list of trees into a numpy.recarray with fields corresponding to the tree branches (the slow pure-Python way...) """ if not isinstance(trees, (list, tuple)): trees = [trees] trees = [asrootpy(tree) for tree in trees] # if branches is None then select only branches with basic types # i.e. no vectors or other special objects tree = trees[0] _branches = {} if branches is None: branches = [] for name, value in tree.buffer.items(): if isinstance(value, Variable): _branches[name] = value branches.append(name) else: if len(set(branches)) != len(branches): raise ValueError("branches contains duplicates") for branch in branches: if branch not in tree.buffer: raise ValueError("Branch %s does not exist in tree" % branch) value = tree.buffer[branch] if not isinstance(value, Variable): raise TypeError("Branch %s is not a basic type: %s" % (branch, type(value))) _branches[branch] = value if not _branches: return None dtype = [(name, convert('ROOTCODE', 'NUMPY', _branches[name].type)) for name in branches] if include_weight: if weight_name not in _branches: dtype.append((weight_name, weight_dtype)) else: raise ValueError("Weight name '%s' conflicts " "with another field name" % weight_name) total_entries = sum([tree.GetEntries() for tree in trees]) array = np.recarray(shape=(total_entries,), dtype=dtype) i = 0 for tree in trees: tree.use_cache(use_cache, cache_size=cache_size, learn_entries=1) if use_cache: tree.always_read(branches) tree_weight = tree.GetWeight() for entry in tree: for j, branch in enumerate(branches): array[i][j] = entry[branch].value if include_weight: array[i][-1] = tree_weight i += 1 return array
def test_get_data_transpose(self): data_to_add = np.recarray((2,), dtype=[("f0", "<f8"), ("f1", "<f8"), ("f2", "<f8")]) data_to_add[0] = (1, 2, 3) data_to_add[1] = (4, 5, 6) self.data.addData(data_to_add) self.assertRaises(RuntimeError, self.data.getData, None, 0, True, None)
def markers(self): """ Return the list of markers in struct-array form. What this produces should be interpreted by hdf5storage as a struct array. We represent the list of n items as a dict where each key is a list of n items. """ ret_list = self._instances[:] + self._markers[:] for name, start_times in self._marker_name_to_start.items(): for start_time in start_times: ret_list.append({ 'type': 'Marker', 'name': name, 'times': [start_time], }) def time_ordering(item): # pylint:disable=missing-docstring if item['times'][0] < 0: return item['times'][-1] else: return item['times'][0] ret_sorted = sorted(ret_list, key=time_ordering) ret_rec = np.recarray((len(ret_sorted),), dtype=[('type', 'O', (1,1)), ('name', 'O', (1,1)), ('times', 'O')]) for i, x in enumerate(ret_sorted): name = np.asarray(x['name'], dtype=np.string_) ret_rec[i]['name'][0] = np.asarray(x['name'], dtype=np.string_) ret_rec[i]['times'] = np.asarray(x['times']) ret_rec[i]['type'][0] = np.asarray(x['type'], dtype=np.string_) return ret_rec
def test_multiple_args_records(self): pyfunc = foobar mystruct_dt = np.dtype([('p', np.float64), ('row', np.float64), ('col', np.float64)]) mystruct = numpy_support.from_dtype(mystruct_dt) cres = compile_isolated(pyfunc, [mystruct[:], types.uint64, types.uint64], return_type=mystruct[:]) cfunc = cres.entry_point st1 = np.recarray(3, dtype=mystruct_dt) st1.p = np.arange(st1.size) + 1 st1.row = np.arange(st1.size) + 1 st1.col = np.arange(st1.size) + 1 old_refcnt_st1 = sys.getrefcount(st1) test_fail_args = ((st1, -1, 1), (st1, 1, -1)) # TypeError is for 2.6 exc_type = OverflowError if sys.version_info >= (2, 7) else TypeError for a, b, c in test_fail_args: with self.assertRaises(exc_type): cfunc(a, b, c) del test_fail_args, a, b, c gc.collect() self.assertEqual(sys.getrefcount(st1), old_refcnt_st1)
def save_records(hfile, where, data): """Save record array-like data to HDF5. Parameters ---------- hfile: h5py.File Opened HDF5 file object. where: str Dataset name. data: Union[pd.DataFrame, np.array] The data to write. Notes ----- When saving a DataFrame, the index information will be lost. """ original_type = str(type(data)) if isinstance(data, pd.DataFrame): data = data.to_records(index=False) if not isinstance(data, np.recarray): data = np.rec.array(data) dtype = [] utf8_encoded = set() json_encoded = set() for name in data.dtype.names: this_dtype = data[name].dtype if this_dtype.itemsize == 0: this_dtype = np.dtype('|{}1'.format(this_dtype.char)) if this_dtype == object or this_dtype.char == "U": dtype.append((name, "|S{}".format(maxlen(data[name])))) utf8_encoded.add(name) else: dtype.append((name, this_dtype)) sanitized = np.recarray(data.shape, dtype=dtype) for i, (name, _) in enumerate(dtype): if name in utf8_encoded: try: sanitized[name] = vencode(data[name]) except TypeError: # try dumping with JSON (for list/dict types) json_data = [json.dumps(col).encode() for col in data[name]] # We have to change the dtype which requires copying the array. # Maybe there is a better way to detect if something is JSON- # encodable earlier on? dtype[i] = (name, "|S{}".format(maxlen(json_data))) sanitized = sanitized.astype(dtype) sanitized[name] = json_data utf8_encoded.remove(name) json_encoded.add(name) else: sanitized[name] = data[name] hfile[where] = sanitized hfile[where].attrs["tabular"] = True hfile[where].attrs["utf8_encoded_fields"] = json.dumps(list(utf8_encoded)) hfile[where].attrs["json_encoded_fields"] = json.dumps(list(json_encoded)) hfile[where].attrs["original_type"] = original_type
def ohmi_envelope(ring, refpts=None, orbit=None, keep_lattice=False): """ Calculate the equilibrium beam envelope in a circular accelerator using Ohmi's beam envelope formalism [1] emit0, beamdata, emit = ohmi_envelope(ring[, refpts]) PARAMETERS ring Lattice object. refpts=None elements at which data is returned. It can be: 1) an integer in the range [-len(ring), len(ring)-1] selecting the element according to python indexing rules. As a special case, len(ring) is allowed and refers to the end of the last element, 2) an ordered list of such integers without duplicates, 3) a numpy array of booleans of maximum length len(ring)+1, where selected elements are True. KEYWORDS orbit=None Avoids looking for the closed orbit if it is already known ((6,) array) keep_lattice=False Assume no lattice change since the previous tracking OUTPUT emit0 emittance data at the start/end of the ring beamdata beam parameters at the start of the ring emit emittance data at the points refered to by refpts, if refpts is None an empty structure is returned. emit is a record array with fields: r66 (6, 6) equilibrium envelope matrix R r44 (4, 4) betatron emittance matrix (dpp = 0) m66 (6, 6) transfer matrix from the start of the ring orbit6 (6,) closed orbit emitXY (2,) betatron emittance projected on xxp and yyp emitXYZ (3,) 6x6 emittance projected on xxp, yyp, ldp beamdata is a record array with fields: tunes tunes of the 3 normal modes damping_rates damping rates of the 3 normal modes mode_matrices R-matrices of the 3 normal modes mode_emittances equilibrium emittances of the 3 normal modes Field values can be obtained with either emit['r66'] or emit.r66 REFERENCES [1] K.Ohmi et al. Phys.Rev.E. Vol.49. (1994) """ def process(r66): # projections on xx', zz', ldp emit3sq = numpy.array([det(r66[s, s]) for s in _submat]) # Prevent from unrealistic negative values of the determinant emit3 = numpy.sqrt(numpy.maximum(emit3sq, 0.0)) # Emittance cut for dpp=0 if emit3[0] < 1.E-13: # No equilibrium emittance r44 = numpy.nan * numpy.ones((4, 4)) elif emit3[1] < 1.E-13: # Uncoupled machine minv = inv(r66[[0, 1, 4, 5], :][:, [0, 1, 4, 5]]) r44 = numpy.zeros((4, 4)) r44[:2, :2] = inv(minv[:2, :2]) else: # Coupled machine minv = inv(r66) r44 = inv(minv[:4, :4]) # betatron emittances (dpp=0) emit2sq = numpy.array( [det(r44[s, s], check_finite=False) for s in _submat[:2]]) # Prevent from unrealistic negative values of the determinant emit2 = numpy.sqrt(numpy.maximum(emit2sq, 0.0)) return r44, emit2, emit3 def propag(m, cumb, orbit6): """Propagate the beam matrix to refpts""" sigmatrix = m.dot(rr).dot(m.T) + cumb m44, emit2, emit3 = process(sigmatrix) return sigmatrix, m44, m, orbit6, emit2, emit3 nelems = len(ring) uint32refs = uint32_refpts(refpts, nelems) bbcum, orbs = _dmatr(ring, orbit=orbit, keep_lattice=keep_lattice) mring, ms = find_m66(ring, uint32refs, orbit=orbs[0], keep_lattice=True) # ------------------------------------------------------------------------ # Equation for the moment matrix R is # R = MRING*R*MRING' + BCUM; # We rewrite it in the form of Lyapunov-Sylvester equation to use scipy's # solve_sylvester function # A*R + R*B = Q # where # A = inv(MRING) # B = -MRING' # Q = inv(MRING)*BCUM # ------------------------------------------------------------------------ aa = inv(mring) bb = -mring.T qq = numpy.dot(aa, bbcum[-1]) rr = solve_sylvester(aa, bb, qq) rr = 0.5 * (rr + rr.T) rr4, emitxy, emitxyz = process(rr) r66data = get_tunes_damp(mring, rr) data0 = numpy.rec.fromarrays((rr, rr4, mring, orbs[0], emitxy, emitxyz), dtype=ENVELOPE_DTYPE) if uint32refs.shape == (0, ): data = numpy.recarray((0, ), dtype=ENVELOPE_DTYPE) else: data = numpy.rec.fromrecords(list( map(propag, ms, bbcum[uint32refs], orbs[uint32refs, :])), dtype=ENVELOPE_DTYPE) return data0, r66data, data
def test(*, l=False): """Test dnb.reduce_precision and hdf5plugin.Bitshuffle.""" from math import sqrt from time import perf_counter # Parameters. nchan = 16 # Number of channels correlated nsamples = 100 # Number of samples integrated, delta_f*delta_t Tsys = 50 # System temperature f = 0.01 # Precision reduction parameter nfreq = 5 # Added dimensionality, spectral frequencies. ntime = 1000 # Added dimensionality, temporal integrations. # Made up channel dependant gain. gain_chan = numpy.arange(nchan) + nchan # Made up frequency dependant gain. bandpass = (numpy.arange(nfreq) + nfreq)**2 # Generate mock data. Model is pure uncorrelated receiver noise. # Auto correlations are a number, everything else is noise. nprod = (nchan * (nchan + 1)) // 2 vis = numpy.recarray((nfreq, nprod, ntime), DTYPE) chan_a = numpy.empty(nprod, numpy.int64) chan_b = numpy.empty(nprod, numpy.int64) for ff in range(nfreq): kk = 0 for ii in range(nchan): for jj in range(ii, nchan): chan_a[kk] = ii chan_b[kk] = jj amp = Tsys * gain_chan[ii] * gain_chan[jj] * bandpass[ff] if (ii == jj): vis[ff, kk].r = numpy.round( amp * abs(1.0 + numpy.random.randn(ntime) / sqrt(nsamples))) vis[ff, kk].i = 0.0 else: vis[ff, kk].r = numpy.round( amp * numpy.random.randn(ntime) / sqrt(2 * nsamples)) vis[ff, kk].i = numpy.round( amp * numpy.random.randn(ntime) / sqrt(2 * nsamples)) kk += 1 # Reduce precision. t0 = perf_counter() vis_rounded = reduce_precision(vis, nchan, chan_a, chan_b, f / nsamples) t = perf_counter() - t0 rate = nfreq * nprod * ntime * DTYPE.itemsize / t print("Throughput(reduce_precision): %f MiB/s" % (rate / 1024**2)) # Compress. with h5py.File('test_int32.h5', 'w') as f: t0 = perf_counter() f.create_dataset('mock_data', data=vis_rounded, **hdf5plugin.Bitshuffle()) t = perf_counter() - t0 rate = nfreq * nprod * ntime * DTYPE.itemsize / t print("Throughput(bitshuffle_compress): %f MiB/s" % (rate / 1024**2)) # Decompress. with h5py.File('test_int32.h5', 'r') as f: t0 = perf_counter() vis_decompressed = f['mock_data'][...] t = perf_counter() - t0 rate = nfreq * nprod * ntime * DTYPE.itemsize / t print("Throughput(bitshuffle_decompress): %f MiB/s" % (rate / 1024**2)) if numpy.any(vis_rounded != vis_decompressed): raise ValueError('Data changed after I/O.') # Calculate compression rate. import os rate = os.path.getsize('test_int32.h5') / (nfreq * nprod * ntime * DTYPE.itemsize) print('Compression rate: %f %%' % (100 * rate)) rounding_error = (vis_rounded.r - vis.r).astype(numpy.int64) if l: print("Rounding bias:") print(numpy.mean(rounding_error, -1)) print("Rounding RMS:") print(numpy.sqrt(numpy.mean(rounding_error**2, -1))) print("Relative to thermal noise:") print(numpy.mean(rounding_error**2, -1) / numpy.var(vis.r, -1))
t = np.arange(200) dt = t[1] - t[0] if type == 'B': x1 = 1 x2 = 0 elif type == '': x1 = 0 x2 = 1 rec = np.recarray(1, dtype=[ ('Q', 'f8', len(pmts)), ('T', 'f8', len(pmts)), ('St', 'f8', len(pmts)), ('mu', 'f8', 1), ('N', 'f8', 1), ('F', 'f8', 1), ('Tf', 'f8', 1), ('Ts', 'f8', 1), ('R', 'f8', 1), ('a', 'f8', 1), ('eta', 'f8', 1), ]) Rec = np.recarray(5000, dtype=[ ('Q', 'f8', len(pmts)), ('T', 'f8', len(pmts)), ('St', 'f8', len(pmts)), ('mu', 'f8', 1), ('N', 'f8', 1), ('F', 'f8', 1),
def calc_LUT(self, use_common=True): """Calculate the Look-up table :return: look up table either in CSR or LUT format depending on serl.method """ if self.pos is None: self.calc_pos() if self.max_size is None and not use_common: self.calc_size() if self.lut is None: with self._sem: if self.lut is None: mask = self.mask if _distortion: if use_common: self.lut = _distortion.calc_sparse( self.pos, self._shape_out, max_pixel_size=(self.delta1, self.delta2), format=self.method) else: if self.method == "lut": self.lut = _distortion.calc_LUT( self.pos, self._shape_out, self.bin_size, max_pixel_size=(self.delta1, self.delta2)) else: self.lut = _distortion.calc_CSR( self.pos, self._shape_out, self.bin_size, max_pixel_size=(self.delta1, self.delta2)) else: lut = numpy.recarray(shape=(self._shape_out[0], self._shape_out[1], self.max_size), dtype=[("idx", numpy.uint32), ("coef", numpy.float32)]) lut[:, :, :].idx = 0 lut[:, :, :].coef = 0.0 outMax = numpy.zeros(self._shape_out, dtype=numpy.uint32) idx = 0 buffer_ = numpy.empty((self.delta1, self.delta2)) quad = Quad(buffer_) for i in range(self._shape_out[0]): for j in range(self._shape_out[1]): if (mask is not None) and mask[i, j]: continue # i,j, idx are indexes of the raw image uncorrected quad.reinit(*list(self.pos[i, j, :, :].ravel())) # print(self.pos[i, j, 0, :], self.pos[i, j, 1, :], self.pos[i, j, 2, :], self.pos[i, j, 3, :] try: quad.populate_box() except Exception as error: print( "error in quad.populate_box of pixel %i, %i: %s" % (i, j, error)) print("calc_area_vectorial", quad.calc_area_vectorial()) print(self.pos[i, j, 0, :], self.pos[i, j, 1, :], self.pos[i, j, 2, :], self.pos[i, j, 3, :]) print(quad) raise # box = quad.get_box() for ms in range(quad.get_box_size0()): ml = ms + quad.get_offset0() if ml < 0 or ml >= self._shape_out[0]: continue for ns in range(quad.get_box_size1()): # ms,ns are indexes of the corrected image in short form, ml & nl are the same nl = ns + quad.get_offset1() if nl < 0 or nl >= self._shape_out[1]: continue val = quad.get_box(ms, ns) if val <= 0: continue k = outMax[ml, nl] lut[ml, nl, k].idx = idx lut[ml, nl, k].coef = val outMax[ml, nl] = k + 1 idx += 1 lut.shape = (self._shape_out[0] * self._shape_out[1]), self.max_size self.lut = lut return self.lut
import os import sys from scipy.stats import poisson, binom from scipy.special import erf as erf from minimize import minimize import multiprocessing pmts = [0, 1, 4, 7, 8, 14] Rec = np.recarray(1, dtype=[ ('Q', 'f8', len(pmts)), ('T', 'f8', len(pmts)), ('St', 'f8', len(pmts)), ('Sa', 'f8', len(pmts)), ('mu', 'f8', 1), ('W', 'f8', 1), ('F', 'f8', 1), ('Tf', 'f8', 1), ('Ts', 'f8', 1), ('R', 'f8', 1), ('a', 'f8', 1), ]) Rec[0] = ([ 0.28609523, 0.21198892, 0.1661045, 0.23595573, 0.2543458, 0.46767996 ], [ 42.43727439, 42.48680044, 42.48223214, 42.61715417, 42.97131299, 42.35603571 ], [1.14722701, 0.82496347, 0.71858647, 1.61434698, 1.48554624, 1.03053529], [ 1.14722701, 0.82496347, 0.71858647, 1.61434698, 1.48554624, 1.03053529 ], 2.57341188, 13.7, 0.11035399, 0.94339727, 34.3602973, 0.5760872, 0.36124252)
def burstensemble( base, x_0, z, r1, r2, r3, mass, radius, bstart, pflux, numburstsobs, ): minmdot = 0.0 maxmdot = 1.0 mdot_res = 1e-6 sbt = bstart salpha = [] stime = [] smdot = [] se_b = [] for i in range(0, numburstsobs): mdot = (0.67 / 8.8) * pflux[i] * r1 tmp = settle(base, z, x_0, mdot, 1.0, mass, radius) mdot_hist = [mdot] while abs(mdot - mdot_hist[len(mdot_hist) - 1]) > mdot_res / 2.0 and ( mdot > minmdot and mdot < maxmdot): mdot_hist.append(mdot) res = np.recarray((1, ), dtype=[("tdel", np.float64), ("e_b", np.float64), ("alpha", np.float64), ("mdot", np.float64)]) # assign elements res.tdel = tmp.tdel / 24.0 res.e_b = tmp.E_b * 0.8 # multiply eb by 0.8 to account for incomlpete burning of fuel, as in Goodwin et al (2018). alpha = tmp.alpha alpha = alpha[0] res.mdot = mdot _e_b = res.e_b _e_b = _e_b[0] se_b.append(_e_b) _mdot = res.mdot _mdot = _mdot[0] salpha.append(alpha) smdot.append(_mdot) stime.append(bstart[i]) mdot_max = max(smdot) result = dict() result["base"] = [base] result["z"] = [z] result["x_0"] = [x_0] result["r1"] = [r1] result["r2"] = [r2] result["r3"] = [r3] result["mdot"] = smdot result["mdot_max"] = [mdot_max] result["time"] = stime result["alpha"] = salpha result["e_b"] = se_b result["mass"] = [mass] result["radius"] = [radius] print('ensemble') print(f"In burstrain fluence is {se_b}") return result
def next_burst(base, z, x_0, t1, tobs, a, b, r1, cfac, mass, radius, direction=1, debug=False): """ Routine to find the next burst in the series and return its properties Adapted from sim_burst.pro """ mdot_res = 1e-6 fn = "next_burst" assert direction in (1, -1) minmdot = 0.0 maxmdot = 1.0 # a, b passed as an array of an array a = a[0] b = b[0] # Determine the initial guess for mean mdot (linear) # i0=min([n_elements(a)-1,max(where(t1 gt tobs))]) itobs = np.where(t1 > tobs)[0] if (len(itobs) == 0) & (direction == -1): # the start time is before *any* of the observations; don't bother! return None if len(itobs) == 0: # this makes no sense to me; if the t1 value is < all the tobs values, then the # nearest element would be the zeroth # itobs = [-1] itobs = [0] # i0=max([0,min([len(a)-1,max([i for i, value in enumerate(tobs) if value < t1])])]) i0 = max([0, min([len(a) - 1, max(itobs)])]) mdot0 = ((0.67 / 8.8) * (a[i0] + b[i0] * t1) * r1) if debug: print("{}: z={}, X_0={}, r1={}".format(fn, z, x_0, r1)) # Calculate the burst properties for the trial mdot value trial = settle(base, z, x_0, mdot0, cfac, mass, radius) if debug: print( '{}: initial guess mdot0={} @ t1={}, tdel={}, direction={}'.format( fn, mdot0, t1, trial.tdel, direction)) # Now update the mdot with the value averaged over the trial interval if direction == 1: mdot = (0.67 / 8.8) * mean_flux(t1, t1 + trial.tdel / 24.0, tobs, a, b) * r1 else: mdot = (0.67 / 8.8) * mean_flux(t1 - trial.tdel / 24.0, t1, tobs, a, b) * r1 # Now retain the entire history of this iteration, so we can check for loops mdot_hist = [mdot0] tdel_hist = [trial.tdel[0] / 24.] nreturn = 0 while (abs(mdot - mdot_hist[-1]) > mdot_res / 2.0) \ and (((t1 + trial.tdel / 24.0 < 2.*max(tobs)) & (direction == 1)) \ or ((t1 - trial.tdel / 24.0 > min(tobs)-(max(tobs)-min(tobs))) & (direction == -1))) \ and (mdot > minmdot and mdot < maxmdot): trial = settle(base, z, x_0, mdot[0], cfac, mass, radius) nreturn = nreturn + 1 mdot_hist.append(mdot[0]) tdel_hist.append(trial.tdel[0] / 24.) if direction == 1: mdot = (0.67 / 8.8) * mean_flux(t1, t1 + (trial.tdel / 24.0), tobs, a, b) * r1 else: mdot = (0.67 / 8.8) * mean_flux(t1 - (trial.tdel / 24.0), t1, tobs, a, b) * r1 # Break out of the loop here, if necessary if nreturn > 10: e = random.random() mdot = mdot_hist[-1] * (1.0 - e) + mdot * e # Perhaps you should try to reset this randomly every 10 steps? - dkg # Yes, otherwise every trial above 10 steps will be random nreturn = 0 # save the final versions to the history arrays mdot_hist.append(mdot[0]) tdel_hist.append(trial.tdel[0] / 24.) if debug: print('{}: mdot_hist={}'.format(fn, mdot_hist)) # now produce a diagnostic plot with the debug flag # plt.plot(t1+np.array(tdel_hist), mdot_hist, '.', label='tdel history') for tdel in tdel_hist: plt.axvline(t1 + tdel, color='k', ls='--') # also calculate a bunch of values to compare with t_arr = np.arange(t1, max(tobs), step=0.1) m_arr = [0] t_arr2 = [t1] for t in t_arr[1:]: _mdot = (0.67 / 8.8) * mean_flux(t1, t, tobs, a, b) * r1 _tmp = settle(base, z, x_0, _mdot, cfac, mass, radius) t_arr2.append(t1 + _tmp.tdel[0] / 24.) m_arr.append(_mdot) plt.plot(t_arr, np.array(t_arr2), '-', label='tdel') plt.plot(t_arr, t_arr, '-', label='1:1') plt.xlim((0, 1.1 * max(t1 + np.array(tdel_hist)))) plt.ylim((0, 1.1 * max(t1 + np.array(tdel_hist)))) # plt.plot(np.array(t_arr2), np.array(m_arr), '.') plt.legend() plt.show() breakpoint() # if mdot < minmdot or mdot > maxmdot: if abs(mdot - mdot_hist[-2]) > mdot_res / 2.0: return None # create array #print(f'{fn}: mdot={mdot}, tdel={trial.tdel}') result = np.recarray((1, ), dtype=[("t2", np.float64), ("e_b", np.float64), ("alpha", np.float64)]) # assign elements result.t2 = t1 + direction * trial.tdel / 24.0 result.e_b = trial.E_b # multiply eb by 0.8 to account for incomlpete burning of fuel, as in Goodwin et al (2018). result.alpha = trial.alpha # result.qnuc = tmp.Q_nuc # result.xbar = tmp.xbar result.mdot = mdot return result
def keypoints(self, image): """ Calculates the keypoints of the image :param image: ndimage of 2D (or 3D if RGB) """ self.reset_timer() with self._sem: total_size = 0 keypoints = [] descriptors = [] assert image.shape[:2] == self.shape assert image.dtype == self.dtype t0 = time.time() if self.dtype == numpy.float32: if type(image) == pyopencl.array.Array: evt = pyopencl.enqueue_copy(self.queue, self.buffers[0].data, image.data) else: evt = pyopencl.enqueue_copy(self.queue, self.buffers[0].data, image) if self.profile: self.events.append(("copy H->D", evt)) elif (len(image.shape) == 3) and (self.dtype == numpy.uint8) and (self.RGB): if type(image) == pyopencl.array.Array: evt = pyopencl.enqueue_copy(self.queue, self.buffers["raw"].data, image.data) else: evt = pyopencl.enqueue_copy(self.queue, self.buffers["raw"].data, image) if self.profile: self.events.append(("copy H->D", evt)) # print self.procsize[0], self.wgsize[0] evt = self.programs["preprocess"].rgb_to_float( self.queue, self.procsize[0], self.wgsize[0], self.buffers["raw"].data, self.buffers[0].data, *self.scales[0]) if self.profile: self.events.append(("RGB -> float", evt)) elif self.dtype in self.converter: program = self.programs["preprocess"].__getattr__( self.converter[self.dtype]) evt = pyopencl.enqueue_copy(self.queue, self.buffers["raw"].data, image) if self.profile: self.events.append(("copy H->D", evt)) evt = program(self.queue, self.procsize[0], self.wgsize[0], self.buffers["raw"].data, self.buffers[0].data, *self.scales[0]) if self.profile: self.events.append(("convert -> float", evt)) else: raise RuntimeError("invalid input format error") k1 = self.programs["reductions"].max_min_global_stage1( self.queue, (self.red_size * self.red_size, ), (self.red_size, ), self.buffers[0].data, self.buffers["max_min"].data, numpy.uint32(self.shape[0] * self.shape[1])) k2 = self.programs["reductions"].max_min_global_stage2( self.queue, (self.red_size, ), (self.red_size, ), self.buffers["max_min"].data, self.buffers["max"].data, self.buffers["min"].data) if self.profile: self.events.append(("max_min_stage1", k1)) self.events.append(("max_min_stage2", k2)) evt = self.programs["preprocess"].normalizes( self.queue, self.procsize[0], self.wgsize[0], self.buffers[0].data, self.buffers["min"].data, self.buffers["max"].data, self.buffers["255"].data, *self.scales[0]) if self.profile: self.events.append(("normalize", evt)) # octSize = 1.0 curSigma = 1.0 if par.DoubleImSize else 0.5 octave = 0 if self._initSigma > curSigma: logger.debug("Bluring image to achieve std: %f", self._initSigma) sigma = math.sqrt(self._initSigma**2 - curSigma**2) self._gaussian_convolution(self.buffers[0], self.buffers[0], sigma, 0) # else: # pyopencl.enqueue_copy(self.queue, dest=self.buffers[(0, "G_1")].data, src=self.buffers["input"].data) for octave in range(self.octave_max): kp, descriptor = self._one_octave(octave) logger.info("in octave %i found %i kp" % (octave, kp.shape[0])) if kp.shape[0] > 0: keypoints.append(kp) descriptors.append(descriptor) total_size += kp.shape[0] ######################################################################## # Merge keypoints in central memory ######################################################################## output = numpy.recarray(shape=(total_size, ), dtype=self.dtype_kp) last = 0 for ds, desc in zip(keypoints, descriptors): l = ds.shape[0] if l > 0: output[last:last + l].x = ds[:, 0] output[last:last + l].y = ds[:, 1] output[last:last + l].scale = ds[:, 2] output[last:last + l].angle = ds[:, 3] output[last:last + l].desc = desc last += l logger.info("Execution time: %.3fms" % (1000 * (time.time() - t0))) # self.count_kp(output) return output
def match(self, nkp1, nkp2, raw_results=False): """Calculate the matching of 2 keypoint list :param nkp1: numpy 1D recarray of keypoints or equivalent GPU buffer :param nkp2: numpy 1D recarray of keypoints or equivalent GPU buffer :param raw_results: if true return the 2D array of indexes of matching keypoints (not the actual keypoints) TODO: implement the ROI ... """ assert len( nkp1.shape) == 1 # Nota: nkp1.ndim is not valid for gpu_arrays assert len(nkp2.shape) == 1 valid_types = (numpy.ndarray, numpy.core.records.recarray, pyopencl.array.Array) assert isinstance(nkp1, valid_types) assert isinstance(nkp2, valid_types) result = None with self.sem: if isinstance(nkp1, pyopencl.array.Array): kpt1_gpu = nkp1 else: if nkp1.size > self.cl_mem["Kp_1"].size: logger.warning( "increasing size of keypoint vector 1 to %i" % nkp1.size) self.cl_mem["Kp_1"] = pyopencl.array.empty( self.queue, (nkp1.size, ), dtype=self.dtype_kp) kpt1_gpu = self.cl_mem["Kp_1"] self._reset_buffer1() evt1 = pyopencl.enqueue_copy(self.queue, kpt1_gpu.data, nkp1) if self.profile: self.events.append(("copy H->D KP_1", evt1)) if isinstance(nkp2, pyopencl.array.Array): kpt2_gpu = nkp2 else: if nkp2.size > self.cl_mem["Kp_2"].size: logger.warning( "increasing size of keypoint vector 2 to %i" % nkp2.size) self.cl_mem["Kp_2"] = pyopencl.array.empty( self.queue, (nkp2.size, ), dtype=self.dtype_kp) kpt2_gpu = self.cl_mem["Kp_2"] self._reset_buffer2() evt2 = pyopencl.enqueue_copy(self.queue, kpt2_gpu.data, nkp2) if self.profile: self.events.append(("copy H->D KP_2", evt2)) if min(kpt1_gpu.size, kpt2_gpu.size) > self.cl_mem["match"].shape[0]: self.kpsize = min(kpt1_gpu.size, kpt2_gpu.size) self.cl_mem["match"] = pyopencl.array.empty(self.queue, (self.kpsize, 2), dtype=numpy.int32) self._reset_output() wg = self.kernel_size["matching"] size = calc_size((nkp1.size, ), (wg, )) evt = self.kernels.matching( self.queue, size, (wg, ), kpt1_gpu.data, kpt2_gpu.data, self.cl_mem["match"].data, self.cl_mem["cnt"].data, numpy.int32(self.kpsize), numpy.float32(par.MatchRatio * par.MatchRatio), numpy.int32(nkp1.size), numpy.int32(nkp2.size)) if self.profile: self.events.append(("matching", evt)) size = self.cl_mem["cnt"].get()[0] match = numpy.empty(shape=(size, 2), dtype=numpy.int32) if size > 0: cpyD2H = pyopencl.enqueue_copy(self.queue, match, self.cl_mem["match"].data) if self.profile: self.events.append(("copy D->H match", cpyD2H)) if raw_results: result = match else: result = numpy.recarray(shape=(size, 2), dtype=self.dtype_kp) result[:, 0] = nkp1[match[:size, 0]] result[:, 1] = nkp2[match[:size, 1]] return result
def main(): ###make sure to change these when running in a new enviorment!### #location of data directory filepath1 = cu.get_output_path() + 'processed_data/NYU_VAGC/' savepath1 = filepath1 + 'custom_catalogues/' filepath2 = cu.get_output_path() + 'processed_data/mpa_dr7/' savepath2 = filepath2 + 'custom_catalogues/' filepath3 = cu.get_output_path() + 'processed_data/berlind_groupcat/' savepath3 = filepath3 + 'custom_catalogues/' ################################################################# cosmo = FlatLambdaCDM(H0=100, Om0=0.3169) #h=1, Omega_m=0.3, Omega_Lambda=0.7 catalogue1 = 'nyu_vagc_dr7' catalogue2 = 'gal_info_gal_totspecsfr_dr7_v5_2' try: sys.argv[1] except: mass = '10.2' else: mass = sys.argv[1] catalogue3 = 'smthresh' + mass + '.groups' print 'reading in', catalogue3, 'catalogue...' catalogue3_new = 'sample3_M_model.sm' + mass print 'making', catalogue3_new, 'catalogue...' #open nyu vagc print catalogue1 f1 = h5py.File(filepath1 + catalogue1 + '.hdf5', 'r') dset1 = f1.get(catalogue1) match13 = np.load(filepath1 + 'berlind_groupcat_match/' + catalogue3 + '_' + catalogue1 + '_match.npy') #open mpa print catalogue2 f2 = h5py.File(filepath2 + catalogue2 + '.hdf5', 'r') dset2 = f2.get(catalogue2) match23 = np.load(filepath2 + 'berlind_groupcat_match/' + catalogue3 + '_' + catalogue2 + '_match.npy') #open groupcat print catalogue3 f3 = h5py.File(filepath3 + catalogue3 + '.hdf5', 'r') dset3 = f3.get(catalogue3) match31 = np.load(filepath3 + 'nyu_vagc_match/' + catalogue1 + '_' + catalogue3 + '_match.npy') match32 = np.load(filepath3 + 'mpa_dr7_match/' + catalogue2 + '_' + catalogue3 + '_match.npy') #if you want to know the colum names of the data sets... #print dset1.dtype.descr #print ' ' #print dset2.dtype.descr #print ' ' #print dset3.dtype.descr #here is the data model for the new group catalogue dtype=[('ID','>i8'),('RA','>f8'),('DEC','>f8'),\ ('Z','>f8'),('Z_ERR','>f8'),('Z_TYPE','>i8'),('VELDISP','>f8'),('VELDISP_ERR','>f8'),('FIBERCOL','>i8'),\ ('M_u,0.1','>f8'),('M_g,0.1','>f8'),('M_r,0.1','>f8'),('M_i,0.1','>f8'),('M_z,0.1','>f8'),\ ('N_SERSIC','>f8'),\ ('MSTAR','>f8'),('SSFR','>f8'),\ ('GROUP_ID','>i8'),('MGROUP','>f8'),('ZGROUP','>f8'),('R200','>f8'),('RPROJ','>f8'),('CEN_IND','>i8')] dtype = np.dtype(dtype) #define fiber collison galaxies result_GC = np.where(dset1['SDSS_SPECTRO_TAG'][match31] == -1)[ 0] #where in the group catalogue are the collisions collision = np.zeros(len(match31), dtype=int) collision[result_GC] = 1 #this flag==1 if this is a collision galaxy #create array to store catalogue in data = np.recarray((len(dset3), ), dtype=dtype) data.fill(-99.9) #if no value is available, set = -99.9 #create gal ID's ID = np.arange(0, len(data), 1).astype(int) #input basics data['ID'] = ID data['RA'] = dset3['ra'] data['DEC'] = dset3['dec'] data['Z'][match13] = dset1['Z'][ match31] #redshift if available from anywhere data['Z_ERR'][match13] = dset1['Z_ERR'][match31] #redshift err from sdss data['Z_TYPE'][match13] = dset1['ZTYPE'][match31] #source of reshift data['VELDISP'][match13] = dset1['VDISP'][match31] data['VELDISP_ERR'][match13] = dset1['VDISP_ERR'][match31] data['FIBERCOL'][match13] = collision #do K+E corrected ABS magnitude AQ = [-4.22, -2.04, -1.62, -1.61, -0.76] EQ = AQ[0] * (data['Z'] - 0.1) x = dset1['ABSMAG_u.nearest.model.z0.10'][match31] - EQ data['M_u,0.1'] = x EQ = AQ[1] * (data['Z'] - 0.1) x = dset1['ABSMAG_g.nearest.model.z0.10'][match31] - EQ data['M_g,0.1'] = x EQ = AQ[2] * (data['Z'] - 0.1) x = dset1['ABSMAG_r.nearest.model.z0.10'][match31] - EQ data['M_r,0.1'] = x EQ = AQ[3] * (data['Z'] - 0.1) x = dset1['ABSMAG_i.nearest.model.z0.10'][match31] - EQ data['M_i,0.1'] = x EQ = AQ[4] * (data['Z'] - 0.1) x = dset1['ABSMAG_z.nearest.model.z0.10'][match31] - EQ data['M_z,0.1'] = x #apply some derived quantities data['N_SERSIC'] = dset1['SERSIC_N_r'][match31] data['MSTAR'] = dset3['Mstar'] #take from Berlind data['SSFR'][match23] = dset2['MEDIAN'][match32] #add some group properties data['GROUP_ID'] = dset3['groupID'] data['MGROUP'] = np.log10(dset3['Mgroup']) #identify central galaxy in groups group_IDs = np.unique(data['GROUP_ID']) for group in group_IDs: #run through each group and identify central galaxy members = np.where(data['GROUP_ID'] == group)[0] largest_mass = np.max(data['MSTAR'][members]) #central is most massive central = np.where((data['GROUP_ID'] == group) & (data['MSTAR'] == largest_mass))[0] central = central[ 0] #should only be one of these, so make it the central data['CEN_IND'][members] = central data['ZGROUP'][members] = data['Z'][central] da = cu.spheredist(data['RA'][central], data['DEC'][central], data['RA'][members], data['DEC'][members]) chi = cosmology.funcs.comoving_distance(data['ZGROUP'][central], cosmo=cosmo) * 1000.0 #in kpc dl = cosmology.funcs.luminosity_distance(data['ZGROUP'][central], cosmo=cosmo) * 1000.0 #in kpc data['RPROJ'][members] = chi / ( 1.0 + data['ZGROUP'][members]) * da #caclulate physical seperation Omega_m = 0.3169 x = 258.1 * (10**data['MGROUP'] / (10.0**12.0))**(1.0 / 3.0) * ( Omega_m / 0.25)**(1.0 / 3.0) * (1.0 + data['ZGROUP'])**(-1.0) data['R200'] = x print 'saving hdf5 version of the catalogue...' filename = catalogue3_new f = h5py.File(savepath3 + filename + '.hdf5', 'w') dset = f.create_dataset(filename, data=data) f.close() print 'saving ascii version of the catalogue...' filename = catalogue3_new data_table = table.table.Table(data=data) ascii.write(data_table, savepath3 + filename + '.dat') print data_table
def do_clustering( alldecks, prefix="MTGTOP8", deck_guess=KLD_deck_guess, start_date=datetime.date(year=2016, month=10, day=1), timestep=7, standard_set_savename='data/standard_legal.json', forced_legal=[], card_to_check=None, ): standard_legal = [ x.lower() for x in get_standard_legal(savename=standard_set_savename, forced_legal=forced_legal) ] if card_to_check is not None: assert card_to_check in standard_legal card_namespace = set(card.lower() for dailies in alldecks.values() for deck in dailies.values() for card in deck['mainboard'] if card.lower() in standard_legal) lowercase_decks = { date: { key: { board: ({(card.lower() if '/' not in card else card.lower().split("/")[0].strip()): deck[board][card] for card in deck[board]} if board not in ('eventid', 'record') else deck[board]) for board in deck } for key, deck in results.items() } for date, results in alldecks.items() } legaldecks = { date: results for date, results in lowercase_decks.items() if not any(card.lower() not in standard_legal for deck in results.values() for card in deck['mainboard']) } illegalcards = { date: { user: ([ card for card in deck['mainboard'] if card.lower() not in standard_legal ], deck['eventid']) for user, deck in results.items() if any([ card for card in deck['mainboard'] if card.lower() not in standard_legal ]) } for date, results in lowercase_decks.items() if any(card.lower() not in standard_legal for deck in results.values() for card in deck['mainboard']) } deckcount = sum(len(x) for x in legaldecks.values()) print("Found {0} decks".format(deckcount)) array = np.recarray(deckcount, dtype=([('ID', 'S40'), ('Date', 'S10'), ('EventID', int), ("Archetype", "S25")] + [(name, np.int16) for name in card_namespace])) ii = 0 for date, daily in legaldecks.items(): for deckname, deck in daily.items(): array['ID'][ii] = deckname.encode('ascii', errors='replace') array['Date'][ii] = date array['EventID'][ii] = deck['eventid'] for card in card_namespace: if card in deck['mainboard']: array[card][ii] = int(deck['mainboard'][card]) + 10 else: array[card][ii] = 0 ii = ii + 1 pd = pandas.DataFrame(array) # get rid of the id column and date column justdata = pd.T[4:] # import skfuzzy as fuzz # # Set up the loop and plot # #fig1, axes1 = pl.subplots(3, 3, figsize=(8, 8)) # fpcs = [] # # # for ncenters in range(2,20): # cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans( # data=justdata, c=ncenters, m=2, error=0.005, maxiter=1000, init=None) # # # Store fpc values for later # fpcs.append(fpc) # # # # Plot assigned clusters, for each data point in training set # cluster_membership = np.argmax(u, axis=0) # # print() # print("Nclusters = {0}".format(ncenters)) # print() # # for ii in range(ncenters): # mask = cluster_membership==ii # deck = (justdata.T[mask] > 0).sum(axis=0) # deck.sort_values(inplace=True) # if any(k in deck.keys()[-10:] for k in easy_decks): # for k in easy_decks: # if k in deck.keys()[-10:]: # name = k # print("Deck {0}={2}: {1} matches, {3}%".format(ii, mask.sum(), name, mask.sum()/len(mask))) # else: # print("Deck {0}: {1} matches, {2}%".format(ii, mask.sum(), mask.sum()/len(mask))) # print(deck[-10:]) # # for j in range(ncenters): # # ax.plot(xpts[cluster_membership == j], # # ypts[cluster_membership == j], '.', color=colors[j]) # # # # # Mark the center of each fuzzy cluster # # for pt in cntr: # # ax.plot(pt[0], pt[1], 'rs') # # # # ax.set_title('Centers = {0}; FPC = {1:.2f}'.format(ncenters, fpc)) # # ax.axis('off') # # # #fig1.tight_layout() # # fig2, ax2 = pl.subplots() # ax2.plot(range(2,20), fpcs) # ax2.set_xlabel("Number of centers") # ax2.set_ylabel("Fuzzy partition coefficient") distortions = [] deck_class = { 'Panharmonicon': ['Panharmonicon'], 'Metalwork Colossus': ['Metalwork Colossus'], 'Aetherworks Marvel': ['Aetherworks Marvel'], #'BG Delirium Aggro': ['Grim Flayer', ], 'BG Delirium Control': [ "Liliana, the Last Hope", 'Grim Flayer', 'Ishkanah, Grafwidow', 'Grasp of Darkness', 'Vessel of Nascency', 'Noxious Gearhulk', 'Ruinous Path' ], 'UW Flash': [ 'Reflector Mage', "Smuggler's Copter", "Thraben Inspector", "Prairie Stream" ], 'Bux Graveyard': ['Haunted Dead', 'Prized Amalgam', 'Voldaren Pariah', 'Cryptbreaker'], 'RW Vehicle Aggro': [ "Smuggler's Copter", "Inspiring Vantage", "Pia Nalaar", "Toolcraft Exemplar", "Thraben Inspector" ], 'Mardu Vehicle Aggro': [ "Concealed Courtyard", "Scrapheap Scrounger", "Smuggler's Copter", "Inspiring Vantage", "Toolcraft Exemplar", "Thraben Inspector" ], 'Wx Humans': [ "Thalia's Lieutenant", "Thraben Inspector", "Town Gossipmonger", "Expedition Envoy", "Always Watching" ], # has a dwarf?! 'RG Energy Aggro': [ 'Servant of the Conduit', 'Attune with Aether', 'Longtusk Cub', 'Voltaic Brawler', 'Bristling Hydra' ], 'Grixis Graveyard Emerge': [ 'Elder Deep-Fiend', "Kozilek's Return", "Prized Amalgam", "Cathartic Reunion", "Haunted Dead", "Wretched Gryff" ], 'RG Pummeler': [ "Electrostatic Pummeler", 'Servant of the Conduit', 'Attune with Aether', "Blossoming Defense", "Built to Smash" ], 'RB Aggro': ['Fiery Temper', 'Bomat Courier', 'Unlicensed Disintegration'], 'UR Control': [ 'Torrential Gearhulk', 'Glimmer of Genius', 'Harnessed Lightning', 'Spirebluff Canal', 'Wandering Fumarole' ], 'UW Control': [ 'Torrential Gearhulk', 'Glimmer of Genius', 'Immolating Glare', 'Blessed Alliance' ], 'UB Control': [ 'Torrential Gearhulk', 'Glimmer of Genius', 'Grasp of Darkness', 'Liliana, the Last Hope', 'Sunken Hollow' ], } easy_decks = ('Panharmonicon', 'Metalwork Colossus', 'Aetherworks Marvel') # +1 for the "others" guess_array = np.zeros([len(deck_guess) + 1, justdata.shape[0]]) for ii, (deckname, deck) in enumerate(deck_guess.items()): for card in deck: if card.lower() not in pd.columns: print( "Deck {0} is not represented in the meta".format(deckname)) #raise ValueError("Card {0} is not real".format(card)) guess_array[ii, justdata.T.keys() == card.lower()] = deck[card] + 10 codebook, distortion = scipy.cluster.vq.kmeans( np.array(justdata.T, 'float'), guess_array) code, dist = scipy.cluster.vq.vq(np.array(justdata.T, 'int'), codebook) #cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans( # data=justdata, c=ncenters, m=2, error=0.005, maxiter=1000, init=None) pd['Distortion'] = dist # Store fpc values for later distortions.append(distortion) cluster_membership = code deck_50_pct = {} deck_counts = {} deck_top20s = {} deck_ids = {} def top20_match(x): x = list(x) if len(x) == 0: return 0 return np.sum(x) / len(x) for ii in range(len(deck_guess) + 1): mask = cluster_membership == ii deck = (justdata.T[mask] > 0).sum(axis=0) deck.sort_values(inplace=True) deck_top20s[ii] = deck name = None card_match_fraction = { dk: top20_match(k.lower() in [this_key.lower() for this_key in deck.keys()[-20:]] for k in ks) for dk, ks in deck_guess.items() } bestfrac = 0 for dk, frac in card_match_fraction.items(): if frac > bestfrac: bestfrac = frac name = dk if bestfrac < 0.7: print("### Deck {0} has bad matches {1}".format(ii, bestfrac)) name = None if name is None: print("Deck {0}: {1} matches, {2:0.2f}% of total".format( ii, mask.sum(), mask.sum() / len(mask) * 100)) deck_ids[ii] = ii deck_50_pct[ii] = mask.sum() / len(mask) deck_counts[ii] = mask.sum() pd.ix[mask, 'Archetype'] = "Other " + str(ii) else: if name in deck_50_pct: print("**********DUPLICATE**********") pd.ix[mask, 'Archetype'] = name + str(ii) else: pd.ix[mask, 'Archetype'] = name deck_50_pct[name] = mask.sum() / len(mask) deck_counts[name] = mask.sum() print("Deck {0}={2}: {1} matches, {3:0.2f}% of total".format( ii, mask.sum(), name, mask.sum() / len(mask) * 100)) deck_ids[name] = ii #pd['Archetype'][mask] = name #print(deck[-20:]) print(len(deck_50_pct), deck_50_pct) final = pandas.DataFrame.from_dict([ (name, deck_50_pct[name], deck_counts[name]) for name in deck_50_pct ]) final.sort_values(by=1, inplace=True) print(final.sort_values(by=1)) week_starts = [ day for day in daterange(start_date, datetime.date.today(), step=timestep) ] weekly_summary = pandas.DataFrame(index=week_starts, columns=deck_guess.keys()) dates = pandas.to_datetime([x.decode() for x in pd.Date]) dates = np.array([ datetime.date(year=2000 + int(x[6:8]), month=int(x[3:5]), day=int(x[0:2])) for x in pd.Date ]) for week_start in week_starts: week_end = week_start + datetime.timedelta(timestep) date_matches = (dates >= week_start) & (dates < week_end) for deck in deck_guess: deck_matches = (pd.Archetype == deck) & date_matches weekly_summary[deck][week_start] = deck_matches.sum( ) / date_matches.sum() weekly_summary.plot(style=[ x + 'o' + y for x, y in zip('rgbcmykrgbcmykrgbcmykrgbcmyk', ['-'] * 7 + ['--'] * 7 + [':'] * 7 + ['-.'] * 7) ], figsize=[24, 20]) pl.xlabel("First date in week") pl.ylabel("Fraction decks in that week") pl.legend(loc='center left', bbox_to_anchor=(1.05, 0.5)) pl.savefig("{prefix}_meta.png".format(prefix=prefix), bbox_inches='tight') def get_deck(num): deck = pd.loc[num] return deck[deck != 0] return pd, get_deck, deck_50_pct, deck_ids, deck_top20s, deck_counts
def _Combine60sDateSpecies(Date, Species='H', Verbose=True, Overwrite=False, DryRun=False): ''' Combines the relevant files for a given species on a given date. Inputs ======= Date : integer, format: yyyymmdd Species: string 'H','He','He2','O','Na' ''' #use species to calculate some constants mass = Globals.Constants.amu * Globals.IonMass.get(Species, Globals.IonMass['H']) e = Globals.Constants.e g = Globals.Constants.g kB = Globals.Constants.kB dOmega = Globals.Constants.dOmega eqbins0 = Globals.EQBins[0] eqbins2 = Globals.EQBins[2] if Species == 'He2': vbins0 = np.sqrt((2 * e * 2000.0 * eqbins0) / mass) vbins2 = np.sqrt((2 * e * 2000.0 * eqbins2) / mass) else: vbins0 = np.sqrt((e * 2000.0 * eqbins0) / mass) vbins2 = np.sqrt((e * 2000.0 * eqbins2) / mass) #get output dtype, file name and path OutPath = Globals.MessPath + 'FIPS/Combined/60s/{:s}/'.format(Species) if not os.path.isdir(OutPath): os.system('mkdir -pv ' + OutPath) dtype = Globals.dtype60s fname = OutPath + '{:08d}.bin'.format(Date) if os.path.isfile(fname) and not Overwrite and not DryRun: print("File {:s} exists".format(fname)) return #read in the four data files (if they exist) dS = ReadData(Date, 'espec') dN = ReadData(Date, 'ntp') dE = ReadData(Date, 'edr') dC = ReadData(Date, 'cdr') if Species == 'H': dA = ReadData(Date, 'ann') else: dA = None #check that there are any data points: if dE.size == 0 and dC.size == 0 and dS.size == 0 and dN.size == 0: return #no data found at all for this date #now we need to work out how many records there are - NTP values #don't exist for all data, so using that will cut out other spectra #might be a good idea to group up the CDR or EDR data StartMET = np.copy(dN.StartMET) StopMET = np.copy(dN.StopMET) StartInd = np.copy(dN.StartIndex) StopInd = np.copy(dN.StopIndex) nN = dN.size grouped = np.zeros(dS.size, dtype='bool') for i in range(0, nN): use = np.where((dS.Index >= StartInd[i]) & (dS.Index <= StopInd[i]))[0] grouped[use] = True #now to group up the rest notgrouped = grouped == False ng = np.where(notgrouped)[0] met = dS.MET[ng] ind = dS.Index[ng] if ng.size > 0: StM = [] SpM = [] StI = [] SpI = [] i = 0 while i < ng.size: use = np.where((met >= met[i]) & (met <= met[i] + 60.0))[0] StM.append(met[use[0]]) SpM.append(met[use[-1]]) StI.append(ind[use[0]]) SpI.append(ind[use[-1]]) i = use[-1] + 1 StartMET = np.append(StartMET, np.array(StM)) StopMET = np.append(StopMET, np.array(SpM)) StartInd = np.append(StartInd, np.array(StI)) StopInd = np.append(StopInd, np.array(SpI)) srt = np.argsort(StartMET) StartMET = StartMET[srt] StopMET = StopMET[srt] StartInd = StartInd[srt] StopInd = StopInd[srt] #now we should have grouped all of the data, time to create the output array n = np.size(StartMET) if n == 0: print('no data') return out = np.recarray(n, dtype=dtype) #save some ion info spstr = Species + (3 - (len(Species))) * ' ' out.Ion = spstr out.Mass = mass #save ut and MET met0 = dC.MET[0] - dC.ut[0] * 3600.0 #MET at the start of the day out.Date = Date out.MET = StopMET out.ut = (out.MET - met0) / 3600.0 out.StartIndex = StartInd out.StopIndex = StopInd #continuous ut out.utc = ContUT(out.Date, out.ut) #position pos = GetPosition(Date) if pos.size > 0: fx = interp1d(pos.ut, pos.x, kind='cubic', bounds_error=False, fill_value='extrapolate') fy = interp1d(pos.ut, pos.y, kind='cubic', bounds_error=False, fill_value='extrapolate') fz = interp1d(pos.ut, pos.z, kind='cubic', bounds_error=False, fill_value='extrapolate') out.x = fx(out.ut) out.y = fy(out.ut) out.z = fz(out.ut) else: pos.x = np.nan pos.y = np.nan pos.z = np.nan #location out.Loc = GetRegion(out.Date, out.ut, out.utc, Verbose=False) #set default CDR quality flag #Normally 0 = good, 1 = bad, here -1 = not present out.CDRQuality[:] = -1 out.NTPQuality[:] = -1 #match ut with ANN output and get ANN outputs out.Class[:] = -1 out.SplitClass[:, :] = -1 out.Prob[:] = np.nan out.SplitProb[:, :] = np.nan out.nk[:] = np.nan out.tk[:] = np.nan out.pk[:] = np.nan out.k[:] = np.nan if not dA is None: if dA.size > 0: Imatch, _ = MatchUT(out.ut, dA.ut) ngood = np.sum(Imatch > -1) if ngood == dA.size: if Verbose: print('ANN data match') elif ngood < dA.size: print('WARNING: missing {:d} ANN points'.format(dA.size - ngood)) else: print( 'WARNING: too many matches, something really bad has happened!' ) for i in range(0, Imatch.size): if Imatch[i] > -1: out.Class[i] = dA.Class[Imatch[i]] out.SplitClass[i] = dA.SplitClass[Imatch[i]] out.Prob[i] = dA.Prob[Imatch[i]] out.SplitProb[i] = dA.SplitProb[Imatch[i]] out.nk[i] = dA.nk[Imatch[i]] out.tk[i] = dA.tk[Imatch[i]] out.k[i] = dA.K[Imatch[i]] out.pk[i] = out.nk[i] * 1e6 * kB * out.tk[i] * 1e6 * 1e9 #get the appropriate flux Flux = dS[Species + 'Flux'] #loop through groups for i in range(0, n): if Verbose: print('\rCopying data {:f}%'.format(100.0 * (i + 1) / n), end='') #get the METS from ESPEC first, the rest have to match this! useS = np.where((dS.Index >= StartInd[i]) & (dS.Index <= StopInd[i]))[0] METS = dS.MET[useS] out[i].StartMET = METS[0] out[i].StopMET = METS[-1] out[i].MET = METS[-1] #now find the other indices by using the MET list useE = np.where(InArray(dE.MET, METS))[0] useC = np.where(InArray(dC.MET, METS))[0] #useE = np.where((dE.MET >= StartMET[i]) & (dE.MET <= StopMET[i]))[0] #useC = np.where((dC.MET >= StartMET[i]) & (dC.MET <= StopMET[i]))[0] useN = np.where(dN.StartIndex == StartInd[i])[0] #get NSpec out[i].NSpec = useS.size #set E/Q and V bins if useE.size == 0: out[i].ScanType = -1 out[i].EQBins = eqbins0 out[i].Tau = 0.095 else: out[i].ScanType = stats.mode(dE[useE].ScanType)[0][0] if out[i].ScanType == 0: out[i].EQBins = eqbins0 out[i].VBins = vbins0 / 1000.0 out[i].Tau = 0.095 else: out[i].EQBins = eqbins2 out[i].VBins = vbins2 / 1000.0 out[i].Tau = 0.005 #copy counts across,summing over spectra (proton counts only here) if useE.size > 0 and Species == 'H': out[i].Counts = np.sum(dE.ProtonRate[useE], 0) else: out[i].Counts[:] = 0 #now to move the fluxes over from ESPEC if useS.size > 0: out[i].Flux = np.nanmean(Flux[useS], 0) #calculate PSD out[i].PSD = out[i].Flux * (mass / (out[i].VBins**2)) * (10.0 / e) #save the quality flags if useC.size > 0 and Species == 'H': out[i].CDRQuality[:useC.size] = dC[useC].Quality #input NTP values if they exist out.HasNTP[i] = False out.n[i] = np.nan out.t[i] = np.nan out.p[i] = np.nan if useN.size > 0 and Species == 'H': #currently this only exists for H out.n[i] = dN[useN[0]].n out.t[i] = dN[useN[0]].t out.p[i] = dN[useN[0]].p out.HasNTP[i] = True out.NTPQuality[i] = dN[useN[0]].Quality if Verbose: print() #This following bit will only work for protons currently, for all other ions Eff = 1 if Species == 'H': #calculate efficiencies Tau2 = np.array([5] * 52 + [0] * 12) / 1000.0 Tau0 = np.array([95] * 60 + [0] * 4) / 1000.0 Eff = np.zeros((n, 64), dtype='float32') for i in range(0, n): if Verbose: print('\rCalculating Efficiencies {:f}%'.format(100.0 * (i + 1) / n), end='') if out[i].ScanType == 0: Ebins = eqbins0 Tau = Tau0 else: Ebins = eqbins2 Tau = Tau2 zero = np.where(out[i].Counts == 0)[0] Eff[i] = _CalculateProtonEff(Ebins, Tau, out[i].Flux, out[i].Counts) Eff[i][zero] = np.nan if np.size(Eff.shape) == 2: Eff = np.nanmean(Eff, 0) Eff[np.isfinite(Eff) == False] = np.nan else: Tau2 = np.array([5] * 52 + [0] * 12) / 1000.0 Tau0 = np.array([95] * 60 + [0] * 4) / 1000.0 Eff = np.zeros((n, 64), dtype='float32') for i in range(0, n): if Verbose: print('\rCalculating Efficiencies {:f}%'.format(100.0 * (i + 1) / n), end='') if out[i].ScanType == 0: Ebins = eqbins0 Tau = Tau0 else: Ebins = eqbins2 Tau = Tau2 zero = np.where(out[i].Counts == 0)[0] Eff[i] = Tau * 1.0 Eff[i][zero] = np.nan if np.size(Eff.shape) == 2: Eff = np.nanmean(Eff, 0) Eff[np.isfinite(Eff) == False] = np.nan if Verbose: print() if Species == 'H': #attempt to refit the spectrum with a kappa distribution for i in range(0, n): if Verbose: print('\rRefitting Spectra {:f}%'.format(100.0 * (i + 1) / n), end='') #save efficiency out[i].Efficiency[:] = Eff #changed this bit so that fitting only happens if n,T,K haven't already been defined in ANN data if np.isnan(out[i].nk): #set starting guess for n and T based on original fits if they exist if np.isnan(out[i].n): n0 = 2.0e6 T0 = 10.0e6 else: n0 = out[i].n * 1e6 T0 = out[i].t * 1e6 #now try fitting nTK = FitKappaDistCts(out.VBins[i] * 1000.0, out.Counts[i], n0, T0, dOmega, mass, Eff, out[i].NSpec, out[i].Tau, g) #check that the values are all positive at least if nTK[0] > 0 and nTK[1] > 0 and nTK[2] > 0: out[i].nk = nTK[0] / 1e6 out[i].tk = nTK[1] / 1e6 out[i].k = nTK[2] out[i].pk = nTK[0] * kB * nTK[1] * 1e9 else: out[i].nk = np.nan out[i].tk = np.nan out[i].k = np.nan out[i].pk = np.nan if Verbose: print() if out.size > 0 and not DryRun: RT.SaveRecarray(out, fname) return out
def predictIterative(printFlag): #TODO: remove / hardcode the following parameters # remove any superfluous ... # Parameters numVotes = 11 usePos = True # True/False: limit to only Positive coefficient values # Label for pos & neg labels pLabel = 1 nLabel = 0 negMultiplier = 1 # LASSO params lMaxIter = 1000 lNorm = True lFitIcpt = True useFeatPathZScore = True # True/False: use the pathsim sum features fZScoreSim = 'features_ZScoreSim.gz' # File name containing path z-score vectors useFeatTermWeights = True # True/False: use the indirect term features useFeatNeighbor = False # True/False: use the neighborhood features useGivenRange = np.linspace(0.00001, 0.05, num=27) # array of vals; 'None' means to auto-search for alphas # Control the iterations & error numVotes = 11 # how many random samples for comparison retrySubPortion = 0.75 # how many of Known to keep in new sub-sample retryMinValid = 9 # minimum Known genes to use for PosTrain if printFlag: print("\nPerforming regression(s) on {}".format(sDir)) # 0) Create the useLabel variable # string: label for the output files # ie: ClusVote_c<Las/Enet/Log/SVM><P for Pos>_f<P for pathsim><Z for z-score> # <T for term weights><N for neighborhood>_m<neg sample size multiplier> useLabel = 'Vote{}_cLas'.format(numVotes) if usePos: useLabel = useLabel + 'P' useLabel = useLabel + '_f' if useFeatPathZScore: useLabel = useLabel + 'Z' if useFeatTermWeights: useLabel = useLabel + 'T' if useFeatNeighbor: useLabel = useLabel + 'N' useLabel = useLabel + '_m{}'.format(negMultiplier) if printFlag: print("Using label: {}".format(useLabel)) # 1) Load the gene-index dictionary & path names geneDict, pathDict = cl.getGeneAndPathDict(sDir) geneNames = list(geneDict.keys()) geneNames.sort() pathNames = cl.removeInvertedPaths(pathDict) del pathDict # 2) Load the network general features numFN = 0 if useFeatNeighbor: featNbVals, featNbNames = cl.getFeaturesNeighborhood(sDir, 'LogScale') featNbNames = np.ravel(featNbNames) numFN = len(featNbNames) #end if numTW = 0 if useFeatTermWeights: featTWVals, featTWNames = cl.getFeaturesTerms(sDir, 'Orig') featTWNames = np.ravel(featTWNames) numTW = len(featTWNames) #end if # 3) Loop over the list of the sample subdirectories dSubDirs = cl.getSubDirectoryList(sDir) thisRound = 0 for si in dSubDirs: thisRound += 1 # Display directory to examine sv = si.split('/') if printFlag: print("\n{}/{}/".format(sv[-3], sv[-2])) # Create index lists for Known, Hidden, Unknown, TrueNeg from files giKnown, giUnknown, giHidden, giTrueNeg = cl.getGeneIndexLists( si, geneDict) giAll = list() giAll.extend(giKnown) giAll.extend(giUnknown) giAll.sort() # 4) Load the sample-specific features numFP = 0 # z-score of path counts features if useFeatPathZScore: featZSVals = np.loadtxt(si + fZScoreSim) featZSVals = featZSVals[:, 0:len(pathNames)] featZSNames = pathNames numFP = len(featZSNames) #end if # 5) Combine the features as specified by parameters (useFeat...) features = np.zeros((len(geneDict), 0), dtype=np.float32) featNames = list() if useFeatPathZScore: if printFlag: print(" ... including path z-score features") features = np.hstack((features, featZSVals)) featNames.extend(featZSNames) if useFeatNeighbor: if printFlag: print(" ... including neighborhood features") features = np.hstack((features, featNbVals)) featNames.extend(np.ravel(featNbNames)) if useFeatTermWeights: # Remove terms with no connection to gene set sumFTV = np.sum(featTWVals[giKnown, :], axis=0) keepIdx = np.nonzero(sumFTV) numTW = len(keepIdx[0]) if printFlag: print(" ... including term membership features") features = np.hstack((features, featTWVals[:, keepIdx[0]])) featNames.extend(np.ravel(featTWNames[keepIdx])) # verify some features have been loaded numFeatAll = len(featNames) if features.shape[1] == 0: print("ERROR: No features were specified for classification.") sys.exit #end if # Normalize the feature values features = cl.normalizeFeatureColumns(features) # Create the structure to rank the Unknown genes & paths geneRanks = np.zeros((len(geneDict), 1), dtype=np.int32) geneScores = np.zeros((len(geneDict), 1), dtype=np.float32) #TODO: How to save feature rankings ?? voteScores = np.zeros((len(giAll), numVotes), dtype=np.float32) # voteScores = np.zeros( (len(geneDict), numVotes), dtype=np.float32) if printFlag: print("{} votes; known: {}, total: {}, trainSet: {}".format( numVotes, len(giKnown), len(giAll), (len(giKnown) * (1 + negMultiplier)))) #end if # Store how many samples use certain features featT1List = np.zeros((numFeatAll, 1), dtype=np.int16) featT5List = np.zeros((numFeatAll, 1), dtype=np.int16) featTAList = np.zeros((numFeatAll, 1), dtype=np.int16) # featT1Dict = dict() # featT1Set = set() # featT5Dict = dict() # featT5Set = set() # featTADict = dict() # featTASet = set() # 6) Prepare the test/train vectors & labels # Extract the vectors for the pos sets retrySubSample = False retries = 0 vote = 0 while vote < numVotes: if len(giKnown) < retryMinValid: retrySubSample = False if retrySubSample: retrySubSample = False numSubSample = int(numSubSample * retrySubPortion) + 1 retryIterKnown = random.sample(giKnown, numSubSample) if len(retryIterKnown) < retryMinValid: retryIterKnown = random.sample(giKnown, retryMinValid) posTrain = features[retryIterKnown, :] posTrainLabel = np.ones((len(retryIterKnown), 1)) * pLabel nExamples = min(negMultiplier * len(retryIterKnown), (len(giAll) - len(retryIterKnown))) else: giKnown = giKnown numSubSample = len(giKnown) posTrain = features[giKnown, :] posTrainLabel = np.ones((len(giKnown), 1)) * pLabel nExamples = min(negMultiplier * len(giKnown), (len(giAll) - len(giKnown))) #end if # Extract the vectors for neg sets # as one-class: train with rand samp from Unknown # test with all Unknown (TrueNeg + Hidden/TruePos) giTrainNeg = random.sample(giUnknown, nExamples) negTrain = features[giTrainNeg, :] negTrainLabel = np.ones((len(giTrainNeg), 1)) * nLabel # Combine to create the full train & test data sets # as one-class: trainSet = np.vstack((posTrain, negTrain)) trainLabel = np.vstack((posTrainLabel, negTrainLabel)) testSet = features[giAll, :] # Some versions want the labels reshaped trainLabel = np.reshape(trainLabel, [ trainLabel.shape[0], ]) # 7) Train classifier, predict on test, collect scores cfier = lm.LassoCV(alphas=useGivenRange, positive=usePos, max_iter=lMaxIter, normalize=lNorm, fit_intercept=lFitIcpt) cfier.fit(trainSet, trainLabel) foundAlpha = cfier.alpha_ if printFlag: print( " Vote {} of {}; iters {:3d}, alpha {:.5f}, score {:.3f}; coeffs {}; sample {}" .format((vote + 1), numVotes, cfier.n_iter_, foundAlpha, cfier.score(trainSet, trainLabel), len(np.nonzero(cfier.coef_)[0]), len(posTrainLabel))) #end if cfPredLabel = cfier.predict(testSet) cfPredLabel = np.ravel(cfPredLabel) # If no coeffs (train score == 0) try again if len(np.nonzero(cfier.coef_)[0]) <= 0: if retries < (numVotes * 5): retrySubSample = True vote = vote - 1 retries += 1 else: if printFlag: print("WARNING: used all retries.") # Else, collect info about the top-used features else: numSubSample = len(giKnown) # Extract indices corresponding to top 5 weighted features featWeights = cfier.coef_ numFeats = len(np.nonzero(featWeights)[0]) topFeats = np.ones((numFeats), dtype=np.int32) * (-1) for num in range(numFeats): featIdx = np.argmax(featWeights) topFeats[num] = featIdx featWeights[featIdx] = 0 #end loop # Increment count for the Top 1 path featT1List[topFeats[0]] += 1 # Increment count for the Top 5 paths for num in range(5): if numFeats <= num: break #end if featT5List[topFeats[num]] += 1 #end loop # Increment count for all non-zero paths for num in range(numFeats): if numFeats <= num: break #end if featTAList[topFeats[num]] += 1 #end loop #end if voteScores[:, vote] = cfPredLabel vote += 1 #end loop (vote) # 8) Place the scores into the array and store across iterations # first, average across the random negative samples (votes) #TODO: really, I should either normalize the score or vote across rank voteScores = cl.normalizeFeatureColumns(voteScores) voteAvgScore = np.mean(voteScores, axis=1) voteUnknownScore = voteAvgScore[giUnknown] ranker = np.recarray(len(giUnknown), dtype=[('inverse', 'f4'), ('score', 'f4'), ('geneIdx', 'i4')]) ranker['score'] = voteUnknownScore ranker['inverse'] = np.multiply(voteUnknownScore, -1) ranker['geneIdx'] = giUnknown ranker.sort(order=['inverse', 'geneIdx']) # 11) Output the ranked genes to file # write the file fname = 'ranked_genes-' + useLabel + '_Avg.txt' if printFlag: print(" Saving ranked genes to file {}".format(fname)) with open(si + fname, 'w') as fout: firstRow = True for row in range(len(ranker)): if not firstRow: fout.write('\n') fout.write('{:3.3f}{}{}'.format( ranker['score'][row], '\t', geneNames[ranker['geneIdx'][row]])) firstRow = False #end with # 12) Output the selected feature info to file #TODO: this # Sort the Top 1 paths featT1Sort = np.recarray(numFeatAll, dtype=[('featIdx', 'i4'), ('count', 'i4')]) for row in range(numFeatAll): featT1Sort['featIdx'][row] = row featT1Sort['count'][row] = featT1List[row] #end if featT1Sort[::-1].sort(order=['count', 'featIdx']) # Save the Top 1 paths to file fname = 'ranked_features_Top1-' + useLabel + '.txt' with open(si + fname, 'w') as fout: fout.write('Votes:{}{}'.format('\t', numVotes)) row = 0 nextVal = featT1Sort['count'][row] while nextVal != 0: fout.write('\n{}{}{}'.format( nextVal, '\t', featNames[featT1Sort['featIdx'][row]])) row += 1 nextVal = featT1Sort['count'][row] #end with # Sort the Top 5 paths featT5Sort = np.recarray(numFeatAll, dtype=[('featIdx', 'i4'), ('count', 'i4')]) for row in range(numFeatAll): featT5Sort['featIdx'][row] = row featT5Sort['count'][row] = featT5List[row] #end if featT5Sort[::-1].sort(order=['count', 'featIdx']) # Save the Top 5 paths to file fname = 'ranked_features_Top5-' + useLabel + '.txt' with open(si + fname, 'w') as fout: fout.write('Votes:{}{}'.format('\t', numVotes)) row = 0 nextVal = featT5Sort['count'][row] while nextVal != 0: fout.write('\n{}{}{}'.format( nextVal, '\t', featNames[featT5Sort['featIdx'][row]])) row += 1 nextVal = featT5Sort['count'][row] #end with # Sort the Top All Non-Zero paths featTASort = np.recarray(numFeatAll, dtype=[('featIdx', 'i4'), ('count', 'i4')]) for row in range(numFeatAll): featTASort['featIdx'][row] = row featTASort['count'][row] = featTAList[row] #end if featTASort[::-1].sort(order=['count', 'featIdx']) # Save the Top All Non-Zero paths to file fname = 'ranked_features_TopNZ-' + useLabel + '.txt' with open(si + fname, 'w') as fout: fout.write('Votes:{}{}'.format('\t', numVotes)) row = 0 nextVal = featTASort['count'][row] while nextVal != 0: fout.write('\n{}{}{}'.format( nextVal, '\t', featNames[featTASort['featIdx'][row]])) row += 1 nextVal = featTASort['count'][row] #end with # 13) Output the parameters to file fname = 'parameters-' + useLabel + '.txt' with open(si + fname, 'w') as fout: fout.write('\n') fout.write('Sampling Method for Neg examples\n') fout.write( ' as One-Class w/ iterations on the weaker predictions\n') fout.write('\n') fout.write('Features Used\n') fout.write('path Z-Score:{}{}\n'.format('\t', useFeatPathZScore)) fout.write('Neighborhood:{}{}\n'.format('\t', useFeatNeighbor)) fout.write('Term Weights:{}{}\n'.format('\t', useFeatTermWeights)) fout.write('\n') #TODO: collect some stats (ie: common alphas, l1 ratios, etc) fout.write('Classifier Parameters\n') fout.write('method:{}Lasso\n'.format('\t')) fout.write('positive:{}{}\n'.format('\t', usePos)) fout.write('alpha range:{}{}\n'.format('\t', useGivenRange)) fout.write('alpha chosen:{}{}\n'.format('\t', cfier.alpha_)) fout.write('max_iter:{}{}\n'.format('\t', lMaxIter)) fout.write('normalize:{}{}\n'.format('\t', lNorm)) fout.write('fit_intercept:{}{}\n'.format('\t', lFitIcpt)) fout.write('\n') #end with if printFlag: print("--{} of {}".format(thisRound, len(dSubDirs)))
def calc_eccentricity(args, options): table = os.path.join(args[0], 'table2.dat') readme = os.path.join(args[0], 'ReadMe') dierickx = ascii.read(table, readme=readme) vxvv = np.dstack([ dierickx['RAdeg'], dierickx['DEdeg'], dierickx['Dist'] / 1e3, dierickx['pmRA'], dierickx['pmDE'], dierickx['HRV'] ])[0] ro, vo, zo = 8., 220., 0.025 ra, dec = vxvv[:, 0], vxvv[:, 1] lb = bovy_coords.radec_to_lb(ra, dec, degree=True) pmra, pmdec = vxvv[:, 3], vxvv[:, 4] pmllpmbb = bovy_coords.pmrapmdec_to_pmllpmbb(pmra, pmdec, ra, dec, degree=True) d, vlos = vxvv[:, 2], vxvv[:, 5] rectgal = bovy_coords.sphergal_to_rectgal(lb[:, 0], lb[:, 1], d, vlos, pmllpmbb[:, 0], pmllpmbb[:, 1], degree=True) vsolar = np.array([-10.1, 4.0, 6.7]) vsun = np.array([ 0., 1., 0., ]) + vsolar / vo X = rectgal[:, 0] / ro Y = rectgal[:, 1] / ro Z = rectgal[:, 2] / ro vx = rectgal[:, 3] / vo vy = rectgal[:, 4] / vo vz = rectgal[:, 5] / vo vsun = np.array([ 0., 1., 0., ]) + vsolar / vo Rphiz = bovy_coords.XYZ_to_galcencyl(X, Y, Z, Zsun=zo / ro) vRvTvz = bovy_coords.vxvyvz_to_galcencyl(vx, vy, vz, Rphiz[:, 0], Rphiz[:, 1], Rphiz[:, 2], vsun=vsun, Xsun=1., Zsun=zo / ro, galcen=True) #do the integration and individual analytic estimate for each object ts = np.linspace(0., 20., 10000) lp = LogarithmicHaloPotential(normalize=1.) e_ana = numpy.zeros(len(vxvv)) e_int = numpy.zeros(len(vxvv)) print( 'Performing orbit integration and analytic parameter estimates for Dierickx et al. sample...' ) for i in tqdm(range(len(vxvv))): try: orbit = Orbit(vxvv[i], radec=True, vo=220., ro=8.) e_ana[i] = orbit.e(analytic=True, pot=lp, c=True) except UnboundError: e_ana[i] = np.nan orbit.integrate(ts, lp) e_int[i] = orbit.e(analytic=False) fig = plt.figure() fig.set_size_inches(1.5 * columnwidth, 1.5 * columnwidth) plt.scatter(e_int, e_ana, s=1, color='Black', lw=0.) plt.xlabel(r'$\mathrm{galpy\ integrated}\ e$') plt.ylabel(r'$\mathrm{galpy\ analytic}\ e$') plt.xlim(0., 1.) plt.ylim(0., 1.) fig.tight_layout() plt.savefig(os.path.join(args[0], 'dierickx-integratedeanalytice.png'), format='png', dpi=200) fig = plt.figure() fig.set_size_inches(1.5 * columnwidth, 1.5 * columnwidth) plt.hist(e_int, bins=30) plt.xlim(0., 1.) plt.xlabel(r'$\mathrm{galpy}\ e$') fig.tight_layout() plt.savefig(os.path.join(args[0], 'dierickx-integratedehist.png'), format='png', dpi=200) fig = plt.figure() fig.set_size_inches(1.5 * columnwidth, 1.5 * columnwidth) plt.scatter(dierickx['e'], e_int, s=1, color='Black', lw=0.) plt.xlabel(r'$\mathrm{Dierickx\ et\ al.}\ e$') plt.ylabel(r'$\mathrm{galpy\ integrated}\ e$') plt.xlim(0., 1.) plt.ylim(0., 1.) fig.tight_layout() plt.savefig(os.path.join(args[0], 'dierickx-integratedee.png'), format='png', dpi=200) fig = plt.figure() fig.set_size_inches(1.5 * columnwidth, 1.5 * columnwidth) plt.scatter(dierickx['e'], e_ana, s=1, color='Black', lw=0.) plt.xlabel(r'$\mathrm{Dierickx\ et\ al.}\ e$') plt.ylabel(r'$\mathrm{galpy\ estimated}\ e$') plt.xlim(0., 1.) plt.ylim(0., 1.) fig.tight_layout() plt.savefig(os.path.join(args[0], 'dierickx-analyticee.png'), format='png', dpi=200) arr = numpy.recarray(len(e_ana), dtype=[('analytic_e', float), ('integrated_e', float)]) arr['analytic_e'] = e_ana arr['integrated_e'] = e_int with open(os.path.join(args[0], 'eccentricities.dat'), 'w') as file: pickle.dump(arr, file) file.close()
def _Combine10sDateSpecies(Date, Species='H', Verbose=True, Overwrite=False): ''' Combines the relevant files for a given species on a given date. Inputs ======= Date : integer, format: yyyymmdd Species: string 'H','He','He2','O','Na' ''' #use species to calculate some constants mass = Globals.Constants.amu * Globals.IonMass.get(Species, Globals.IonMass['H']) e = Globals.Constants.e g = Globals.Constants.g kB = Globals.Constants.kB dOmega = Globals.Constants.dOmega eqbins0 = Globals.EQBins[0] eqbins2 = Globals.EQBins[2] if Species == 'He2': vbins0 = np.sqrt((2 * e * 2000.0 * eqbins0) / mass) vbins2 = np.sqrt((2 * e * 2000.0 * eqbins2) / mass) else: vbins0 = np.sqrt((e * 2000.0 * eqbins0) / mass) vbins2 = np.sqrt((e * 2000.0 * eqbins2) / mass) #get output dtype, file name and path OutPath = Globals.MessPath + 'FIPS/Combined/10s/{:s}/'.format(Species) if not os.path.isdir(OutPath): os.system('mkdir -pv ' + OutPath) dtype = Globals.dtype10s fname = OutPath + '{:08d}.bin'.format(Date) if os.path.isfile(fname) and not Overwrite: print("File {:s} exists".format(fname)) return #read in the four data files (if they exist) dS = ReadData(Date, 'espec') dN = ReadData(Date, 'ntp') dE = ReadData(Date, 'edr') dC = ReadData(Date, 'cdr') #check that there are any data points: if dE.size == 0 and dC.size == 0 and dS.size == 0 and dN.size == 0: return #no data found at all for this date #find number of record using either EDR/CDR (for H) or ESPEC (for everything else) if Species == 'H': n = dC.size MET = dC.MET Index = np.arange(dC.size) else: n = dS.size MET = dS.MET Index = dS.Index #now time to create the output array out = np.recarray(n, dtype=dtype) #save some ion info spstr = Species + (3 - (len(Species))) * ' ' out.Ion = spstr out.Mass = mass #save ut and MET met0 = dC.MET[0] - dC.ut[0] * 3600.0 #MET at the start of the day out.Date = Date out.Index = Index out.MET = MET out.ut = (out.MET - met0) / 3600.0 #continuous ut out.utc = ContUT(out.Date, out.ut) #position pos = GetPosition(Date) if pos.size > 0: fx = interp1d(pos.ut, pos.x, kind='cubic', bounds_error=False, fill_value='extrapolate') fy = interp1d(pos.ut, pos.y, kind='cubic', bounds_error=False, fill_value='extrapolate') fz = interp1d(pos.ut, pos.z, kind='cubic', bounds_error=False, fill_value='extrapolate') out.x = fx(out.ut) out.y = fy(out.ut) out.z = fz(out.ut) else: pos.x = np.nan pos.y = np.nan pos.z = np.nan #location out.Loc = GetRegion(out.Date, out.ut, out.utc, Verbose=False) #set default CDR quality flag #Normally 0 = good, 1 = bad, here -1 = not present out.CDRQuality[:] = -1 out.NTPQuality[:] = -1 #get the appropriate flux Flux = dS[Species + 'Flux'] #loop through groups for i in range(0, n): if Verbose: print('\rCopying data {:f}%'.format(100.0 * (i + 1) / n), end='') #get the METS from ESPEC first, the rest have to match this! if Species == 'H': useS = np.where(dS.Index == Index[i])[0] useE = np.array([i]) useC = np.array([i]) else: useS = np.array([i]) useC = np.where(dC.Index == Index[i])[0] useE = useC #useC = np.where((dC.MET >= StartMET[i]) & (dC.MET <= StopMET[i]))[0] useN = np.where((dN.StartIndex <= Index[i]) & (dN.StopIndex >= Index[i]))[0] #set E/Q and V bins if useE.size == 0: out[i].ScanType = -1 out[i].EQBins = eqbins0 out[i].Tau = 0.095 else: out[i].ScanType = stats.mode(dE[useE].ScanType)[0][0] if out[i].ScanType == 0: out[i].EQBins = eqbins0 out[i].VBins = vbins0 / 1000.0 out[i].Tau = 0.095 else: out[i].EQBins = eqbins2 out[i].VBins = vbins2 / 1000.0 out[i].Tau = 0.005 #copy counts across,summing over spectra (proton counts only here) if useE.size > 0 and Species == 'H': out[i].Counts = np.sum(dE.ProtonRate[useE], 0) else: out[i].Counts[:] = 0 #now to move the fluxes over from ESPEC if useS.size > 0: out[i].Flux = Flux[useS[0]] #calculate PSD out[i].PSD = out[i].Flux * (mass / (out[i].VBins**2)) * (10.0 / e) #save the quality flags if useC.size > 0: out[i].CDRQuality = dC[useC[0]].Quality #input NTP values if they exist out.HasNTP[i] = False out.n[i] = np.nan out.t[i] = np.nan out.p[i] = np.nan if useN.size > 0 and Species == 'H': #currently this only exists for H out.n[i] = dN[useN[0]].n out.t[i] = dN[useN[0]].t out.p[i] = dN[useN[0]].p out.HasNTP[i] = True out.NTPQuality[i] = dN[useN[0]].Quality if Verbose: print() #This following bit will only work for protons currently, for all other ions Eff = 1 if Species == 'H': #calculate efficiencies Tau2 = np.array([5] * 52 + [0] * 12) / 1000.0 Tau0 = np.array([95] * 60 + [0] * 4) / 1000.0 Eff = np.zeros((n, 64), dtype='float32') for i in range(0, n): if Verbose: print('\rCalculating Efficiencies {:f}%'.format(100.0 * (i + 1) / n), end='') if out[i].ScanType == 0: Ebins = eqbins0 Tau = Tau0 else: Ebins = eqbins2 Tau = Tau2 zero = np.where(out[i].Counts == 0)[0] Eff[i] = _CalculateProtonEff(Ebins, Tau, out[i].Flux, out[i].Counts) nf = np.where(np.isfinite(Eff[i]) == False)[0] Eff[i][nf] = np.nan Eff[i][zero] = np.nan if np.size(Eff.shape) == 2: Eff = np.nanmean(Eff, 0) Eff[np.isfinite(Eff) == False] = np.nan else: Tau2 = np.array([5] * 52 + [0] * 12) / 1000.0 Tau0 = np.array([95] * 60 + [0] * 4) / 1000.0 Eff = np.zeros((n, 64), dtype='float32') for i in range(0, n): if Verbose: print('\rCalculating Efficiencies {:f}%'.format(100.0 * (i + 1) / n), end='') if out[i].ScanType == 0: Ebins = eqbins0 Tau = Tau0 else: Ebins = eqbins2 Tau = Tau2 zero = np.where(out[i].Counts == 0)[0] nf = np.where(np.isfinite(Eff) == False)[0] Eff[i] = Tau * 1.0 Eff[i][zero] = np.nan Eff[i][nf] = np.nan if np.size(Eff.shape) == 2: Eff = np.nanmean(Eff, 0) Eff[np.isfinite(Eff) == False] = np.nan if Verbose: print() if Species == 'H': #attempt to refit the spectrum with a kappa distribution for i in range(0, n): if Verbose: print('\rRefitting Spectra {:f}%'.format(100.0 * (i + 1) / n), end='') #save efficiency out[i].Efficiency[:] = Eff #set starting guess for n and T based on original fits if they exist if np.isnan(out[i].n): n0 = 2.0e6 T0 = 10.0e6 else: n0 = out[i].n * 1e6 T0 = out[i].t * 1e6 #now try fitting nTK = FitKappaDistCts(out.VBins[i] * 1000.0, out.Counts[i], n0, T0, dOmega, mass, Eff, 1, out[i].Tau, g) #check that the values are all positive at least if nTK[0] > 0 and nTK[1] > 0 and nTK[2] > 0: out[i].nk = nTK[0] / 1e6 out[i].tk = nTK[1] / 1e6 out[i].k = nTK[2] out[i].pk = nTK[0] * kB * nTK[1] * 1e9 else: out[i].nk = np.nan out[i].tk = np.nan out[i].k = np.nan out[i].pk = np.nan if Verbose: print() if out.size > 0: RT.SaveRecarray(out, fname) return out
def test_record_scalar_setitem(self): # https://github.com/numpy/numpy/issues/3561 rec = np.recarray(1, dtype=[('x', float, 5)]) rec[0].x = 1 assert_equal(rec[0].x, np.ones(5))
def get_tree(proto_tree, N, vocabulary, L, params): ''' ''' dtype = [('node_idx', int), ('pair', int), ('side', '|S1'), ('parent', int), ('child_left', int), ('child_right', int), ('depth', int), ('leaf', bool), ('t', np.object), ('t_gpu', np.object), ('y_gpu', np.object), ('x_gpu', np.object), ('d_gpu', np.object), ('ds_gpu', np.object), ('d2s_gpu', np.object), ('word_idx', int), ('word', 'U27')] tree = np.recarray(N, dtype=dtype) tree[:] = -1 tree['pair'] = -1 children_stack = [] node_idx = 0 side = '' depth = 1 #-------------- Add data to tree -----------------# def add_data(wrd, leaf, child_l=None, child_r=None): tree[node_idx]['node_idx'] = node_idx tree[node_idx]['side'] = side tree[node_idx]['t'] = np.zeros(params['C'], dtype=np.float64) tree[node_idx]['t'][wrd[0]] = 1.0 tree[node_idx]['depth'] = depth tree[node_idx]['leaf'] = leaf if leaf: tree[node_idx]['word'] = wrd[1] tree[node_idx]['word_idx'] = vocabulary.index(wrd[1]) x_gpu = gpuarray.to_gpu(L[tree[node_idx]['word_idx'], :]) tree[node_idx]['x_gpu'] = x_gpu else: tree[node_idx]['word'] = '' tree[node_idx].child_right = child_r tree[node_idx].child_left = child_l # Add parent tree[child_r]['parent'] = tree[node_idx]['node_idx'] tree[child_l]['parent'] = tree[node_idx]['node_idx'] tree[node_idx]['x_gpu'] = gpuarray.empty(params['w_d'], np.float64) tree[node_idx]['d_gpu'] = gpuarray.empty(params['w_d'], np.float64) tree[node_idx]['ds_gpu'] = gpuarray.empty(params['w_d'], np.float64) tree[node_idx]['d2s_gpu'] = gpuarray.empty(params['C'], np.float64) tree[node_idx]['y_gpu'] = gpuarray.empty(params['C'], np.float64) tree[node_idx]['t_gpu'] = gpuarray.to_gpu(tree[node_idx]['t']) # All pairs at current depth pairs = tree['pair'][tree['depth'] == depth] # Max is last pair if pairs.max() == -1: # No pairs at this depth tree[node_idx]['pair'] = 1 # Start new pair elif (pairs > -1).sum() % 2: # Odd number -> complete the last pair tree[node_idx]['pair'] = pairs.max() else: # Left half of new pair tree[node_idx]['pair'] = pairs.max() + 1 #-----------------------------------------------# pos = 'proto_tree' while len(proto_tree) > 1: if len(eval(pos)) == 3: # Both branches pos += '[1]' # go down in left depth += 1 side = 'l' # left side elif len(eval(pos)) == 2: # Only right branch or leaf if type(eval(pos + '[1]')) is str: # leaf # Cut pos = pos[:-3] wrd = eval(pos).pop(1) add_data(wrd, True) # Put node_idx in the children_stack, but don't pop since its # a leaf node, ie no children children_stack.append(node_idx) node_idx += 1 # Climb up depth -= 1 else: # right branch pos += '[1]' # go down in right depth += 1 side = 'r' # right side elif len(eval(pos)) == 1: # Branch node, w both children cut # Cut pos = pos[:-3] wrd = eval(pos).pop(1) # left OR right side in a pair? if len(eval(pos)) == 2: # if parent len is 2 -> left side = 'l' # left side elif len(eval(pos)) == 1: # if parent len is 1 -> right side = 'r' # right side # Pop children child_r = children_stack.pop() child_l = children_stack.pop() add_data(wrd, False, child_l=child_l, child_r=child_r) # Put node_idx in the children_stack children_stack.append(node_idx) node_idx += 1 # Climb up depth -= 1 side = '' child_r = children_stack.pop() child_l = children_stack.pop() add_data(proto_tree, False, child_l=child_l, child_r=child_r) tree[node_idx]['parent'] = -1 # fix -1/last element mix up. return tree
if chandra_type[i] == 'GALAXY': rad = float(0.9 * 16.6 + (1.08 * z)) chandra_r.append(rad) chandra_t.append('!GALAXY') else: rad = float(0.5 * 16.6 + (1.08 * z)) chandra_r.append(rad) chandra_t.append('!AGN') for i in range(0, num): radius = chandra_r[i] / 0.05 r[i] = radius data2 = np.recarray((num, ), dtype=(numpy.record, [('TYPE', 'S16'), ('X', '>f4', (4, )), ('Y', '>f4', (4, )), ('R', '>f4', (4, )), ('FLUX', '>f4', (1, )), ('ROTANG', '>f4', (4, ))])) for i in range(0, num): data2[i][0] = chandra_t[i] data2[i][1] = (chandra_ra[i], 0, 0, 0) data2[i][2] = (chandra_dec[i], 0, 0, 0) data2[i][3] = (r[i], 0, 0, 0) data2[i][4] = chandra_flux[i] data2[i][5] = (0., 0, 0, 0) file = "pnS005-bkg_region-radec.fits" hdu = pyfits.open(file) data = hdu[1].data nhdu1 = pyfits.PrimaryHDU()
class DataChopper(PropertiedObject, BaseFilter): """ EventDataChopper converts continuous time series of entire session into chunks based on the events specification In other words you may read entire eeg session first and then using EventDataChopper divide it into chunks corresponding to events of your choice """ _descriptors = [ TypeValTuple('start_time', float, 0.0), TypeValTuple('end_time', float, 0.0), TypeValTuple('buffer_time', float, 0.0), TypeValTuple('events', np.recarray, np.recarray((1, ), dtype=[('x', int)])), TypeValTuple('start_offsets', np.ndarray, np.array([], dtype=int)), TypeValTuple('session_data', TimeSeriesX, TimeSeriesX([0.0], dict(samplerate=1.), dims=['time'])), ] def __init__(self, **kwds): """ Constructor: :param kwds:allowed values are: ------------------------------------- :param start_time {float} - read start offset in seconds w.r.t to the eegeffset specified in the events recarray :param end_time {float} - read end offset in seconds w.r.t to the eegeffset specified in the events recarray :param end_time {float} - extra buffer in seconds (subtracted from start read and added to end read) :param events {np.recarray} - numpy recarray representing events :param startoffsets {np.ndarray} - numpy array with offsets at which chopping should take place :param session_datar {str} - TimeSeriesX object with eeg session data :return: None """ self.init_attrs(kwds) def get_event_chunk_size_and_start_point_shift(self, eegoffset, samplerate, offset_time_array): """ Computes number of time points for each event and read offset w.r.t. event's eegoffset :param ev: record representing single event :param samplerate: samplerate fo the time series :param offset_time_array: "offsets" axis of the DataArray returned by EEGReader. This is the axis that represents time axis but instead of beind dimensioned to seconds it simply represents position of a given data point in a series The time axis is constructed by dividint offsets axis by the samplerate :return: event's read chunk size {int}, read offset w.r.t. to event's eegoffset {} """ # figuring out read size chunk and shift w.r.t to eegoffset. We need this fcn in case we pass resampled session data original_samplerate = float( (offset_time_array[-1] - offset_time_array[0])) / offset_time_array.shape[0] * samplerate start_point = eegoffset - int( np.ceil( (self.buffer_time - self.start_time) * original_samplerate)) end_point = eegoffset + int( np.ceil((self.end_time + self.buffer_time) * original_samplerate)) selector_array = np.where((offset_time_array >= start_point) & (offset_time_array < end_point))[0] start_point_shift = selector_array[0] - np.where( (offset_time_array >= eegoffset))[0][0] return len(selector_array), start_point_shift def filter(self): """ Chops session into chunks corresponding to events :return: timeSeriesX object with chopped session """ chop_on_start_offsets_flag = bool(len(self.start_offsets)) if chop_on_start_offsets_flag: start_offsets = self.start_offsets chopping_axis_name = 'start_offsets' chopping_axis_data = start_offsets else: evs = self.events[self.events.eegfile == self.session_data.attrs['dataroot']] start_offsets = evs.eegoffset chopping_axis_name = 'events' chopping_axis_data = evs # samplerate = self.session_data.attrs['samplerate'] samplerate = float(self.session_data['samplerate']) offset_time_array = self.session_data['offsets'] event_chunk_size, start_point_shift = self.get_event_chunk_size_and_start_point_shift( eegoffset=start_offsets[0], samplerate=samplerate, offset_time_array=offset_time_array) event_time_axis = np.arange(event_chunk_size) * (1.0 / samplerate) + ( self.start_time - self.buffer_time) data_list = [] for i, eegoffset in enumerate(start_offsets): start_chop_pos = np.where(offset_time_array >= eegoffset)[0][0] start_chop_pos += start_point_shift selector_array = np.arange(start=start_chop_pos, stop=start_chop_pos + event_chunk_size) chopped_data_array = self.session_data.isel(time=selector_array) chopped_data_array['time'] = event_time_axis chopped_data_array['start_offsets'] = [i] data_list.append(chopped_data_array) ev_concat_data = xr.concat(data_list, dim='start_offsets') ev_concat_data = ev_concat_data.rename( {'start_offsets': chopping_axis_name}) ev_concat_data[chopping_axis_name] = chopping_axis_data attrs = { "start_time": self.start_time, "end_time": self.end_time, "buffer_time": self.buffer_time } ev_concat_data['samplerate'] = samplerate return TimeSeriesX.create(ev_concat_data, samplerate, attrs=attrs)
def extract_data(pattern, query, headers, ctypes=None, fname=''): """ Extract data from CSV files whose name matches pattern. Every record in a given file is checked if it satisfies the query condition(s). If the query condition(s) are satisfied, data from the columns specified by headers are extracted from that record. Collected records are returned in a numpy record array and, if a filename fname is specified, they are also written to fn in tab-separated CSV format. If no matching records are found an empty record array of type bool is returned. argument: comment: pattern a file name pattern for files from which records are to be extracted query conditions in the form of a dictionary (list of key-value pairs) that need to be fulfilled for a record to be extracted; the query dictionary is specified in one of these forms: dict(k1=v1, k2=v2, k3=v3, ...) dict({"k1":v1, "k2":v2, "k3":v3, ...}) {"k1":v1, "k2":v2, "k3":v3, ...} headers a list of strings specifying the column headers for the columns which are to be extracted ctypes if not None, is a dictionary mapping column number or munged column name to a converter function; the column type converter dictionary can be specified as: {"k1":t1, "k2":t2, "k3":t3, ...} where the t can be, e.g., str, int, float, bool. fname if defined, the name of the CSV file (tab-separated) to which extracted records are written """ # get all file names that match pattern infiles = glob.glob(pattern) infiles.sort() # determine the query and header keys (in lowercase because csv2rec # lowercases headers), and query values qkeys = query.keys() qlckeys = [x.lower() for x in qkeys] qvalues = query.values() hlckeys = [x.lower() for x in headers] if ctypes: ctypes_lc = dict( (key.lower(), value) for (key, value) in ctypes.items()) else: ctypes_lc = None mkeys = set(qlckeys) mkeys = mkeys.union(hlckeys) mrows = [] # check files for query patterns for f in infiles: d = mlab.csv2rec(f, delimiter='\t', converterd=ctypes_lc) # check if the data contain the necessary columns if mkeys <= set(d.dtype.names): # find the records that match the query darray = mlab.rec_keep_fields(d, qlckeys) imatch = np.array([False] * darray.size) for i in range(darray.size): if list(darray[i]) == qvalues: imatch[i] = True # get data from records that matched the query if any(imatch): marray = mlab.rec_keep_fields(d, hlckeys)[imatch] for row in marray: mrows.append(row.tolist()) # write data from matching records to file if requested and return results if mrows: # The following does not work because the mlab.csv2rec() converterd # data type specifications are different from the # np.core.records.fromrecords() dtype data type specifications ... #results = np.core.records.fromrecords(mrows, dtype=ctypes_lc) # ... so, for now we cross our fingers and hope that # np.core.records.fromrecords() intuits the data types correctly, which # it seems to do (most of the time) results = np.core.records.fromrecords(mrows, names=headers) else: dt = [(h, bool) for h in headers] results = np.recarray(0, dtype=dt) if fname != '': mlab.rec2csv(results, fname, delimiter='\t') return results
def test_matching(self): ''' tests keypoints matching kernel ''' image = scipy.misc.lena().astype(numpy.float32) try: import feature except: logger.error( "WARNING: feature module is not available to compare results with C++ implementation. Matching cannot be tested." ) feature = None if (feature != None): #get the struct keypoints : (x,y,s,angle,[descriptors]) sc = feature.SiftAlignment() ref_sift = sc.sift(image) ref_sift_2 = numpy.recarray((ref_sift.shape), dtype=ref_sift.dtype) ref_sift_2[:] = (ref_sift[::-1]) t0_matching = time.time() siftmatch = feature.sift_match(ref_sift, ref_sift_2) t1_matching = time.time() ref = ref_sift.desc if (USE_CPU): wg = 1, else: wg = 64, shape = ref_sift.shape[0] * wg[0], ratio_th = numpy.float32(0.5329) #sift.cpp : 0.73*0.73 keypoints_start, keypoints_end = 0, min(ref_sift.shape[0], ref_sift_2.shape[0]) gpu_keypoints1 = pyopencl.array.to_device(queue, ref_sift) gpu_keypoints2 = pyopencl.array.to_device(queue, ref_sift_2) gpu_matchings = pyopencl.array.zeros( queue, (keypoints_end - keypoints_start, 2), dtype=numpy.int32, order="C") keypoints_start, keypoints_end = numpy.int32( keypoints_start), numpy.int32(keypoints_end) nb_keypoints = numpy.int32(10000) counter = pyopencl.array.zeros(queue, (1, 1), dtype=numpy.int32, order="C") t0 = time.time() k1 = self.program.matching(queue, shape, wg, gpu_keypoints1.data, gpu_keypoints2.data, gpu_matchings.data, counter.data, nb_keypoints, ratio_th, keypoints_end, keypoints_end) res = gpu_matchings.get() cnt = counter.get() t1 = time.time() # ref_python, nb_match = my_matching(kp1, kp2, keypoints_start, keypoints_end) t2 = time.time() res_sort = res[numpy.argsort(res[:, 1])] # ref_sort = ref[numpy.argsort(ref[:,1])] print res[0:20] print "" # print ref_sort[0:20] print("C++ Matching took %.3f ms" % (1000.0 * (t1_matching - t0_matching))) print("OpenCL: %d match / C++ : %d match" % (cnt, siftmatch.shape[0])) #sort to compare added keypoints ''' delta = abs(res_sort-ref_sort).max() self.assert_(delta == 0, "delta=%s" % (delta)) #integers logger.info("delta=%s" % delta) ''' if PROFILE: logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0))) logger.info("Matching took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
def load_data(self): """ Load the data from delirium file and make some check on it The check are set thanks to the data_check dictionary attribute: 'complete' : check if all data is there 'sample' : check if the data is well sampled 'nan' : check for Nan Values 'glitches' : check glitches """ ## load the raw data self.load_header() if self.f: self.log.notice("Loadding raw data of '%s'" % self.filename, 3) print("self.f", self.f) data = np.loadtxt(self.f, comments="%", dtype=self.params.get_dtypes()) else: N = int((self.opl_max - self.opl_min) / self.opl_sampling + 1) data = np.zeros((N, ), dtype=self.params.get_dtypes()) data['opl'] = np.linspace(self.opl_min, self.opl_max, N) self.data = data self.log.notice("Fake Delirium created with empy value", 3) return if not len(data): self.log.error("data is empty") raise DataError("data is empty") ## set the reverse flag for future isterezis reference self.reverse = data["opl"][0] > data["opl"][-1] ## now sort the table by opl data.sort(order="opl") opl = data["opl"] opl_sampling = self.opl_sampling ## complete the data to nominal range ## so that the table as always the good size if self.data_check["complete"]: self.log.notice("Check data completness", 3) first = self.opl2index(opl[0]) addrowfirst = 0 addrowend = 0 if first > 0: self.log.warning( "Missing data at begining of scan, data start at OPL =%.2f" % opl[0]) addrowfirst += first last = self.opl2index(opl[-1]) ## the maximum index from the opl_max distance imax = self.opl2index(self.opl_max) if last < imax: self.log.warning( "Missing data at end of scan, data end at OPL =%.2f" % opl[-1]) addrowend += int(imax - last) if addrowfirst + addrowend: newdata = np.recarray( (data.shape[0] + addrowfirst + addrowend, ), dtype=data.dtype) if addrowend: newdata[addrowfirst:-addrowend] = data else: newdata[addrowfirst:] = data newdata[0:addrowfirst] = data[0] newdata["opl"][0:addrowfirst] = self.index2opl( np.arange(first)) if addrowend: newdata[-addrowend:] = data[-1] newdata["opl"][-addrowend:] = self.index2opl( np.arange(last, imax)) data = newdata # Check that data are sampled by 0.375m ?3cm OPL intervalle. if self.data_check["sample"]: self.log.notice("Check data sampling", 3) diff = data["opl"][1:] - data["opl"][:-1] if (abs(data["opl"][0] - self.opl_min) > self.opl_tol): self.log.error('Incorrect range: min = %f m' % (data["opl"][0])) raise DataError('Incorrect range: min = %f m' % (data["opl"][0])) if (abs(data["opl"][-1] - self.opl_max) > self.opl_tol): self.log.error('Incorrect range: max = %f m' % (data["opl"][-1])) raise DataError('Incorrect range: max = %f m' % (data["opl"][-1])) if ((abs(diff) - self.opl_sampling) > self.opl_tol).any(): self.log.error( 'Incorrect sampling, some does not follow %.3f+-%.3f ' % (self.opl_sampling, self.opl_tol)) raise DataError( 'Incorrect sampling, some does not follow %.3f+-%.3f ' % (self.opl_sampling, self.opl_tol)) mask = np.zeros(data.shape, dtype=bool) if self.data_check["nan"]: opl = data['opl'] self.log.notice("Check NaN values", 3) for key in ["doms", "incl", "yctr", "zctr", "yend", "zend"]: v = data[key] Nv = len(v) test = np.isnan(v) ou = np.where(test)[0] Nou = len(ou) if not Nou: continue self.log.notice( "Found %d NaN values for parameter %s" % (Nou, key), 1) for iou, i in enumerate(ou): if (iou + 1) < Nou and ou[iou + 1] == i + 1: self.log.error( "At least two consecutives NaN values. Cannot fixe delirium data" ) raise DataError( "At least two consecutives NaN values. Cannot fixe delirium data" ) if i == 0: v[i] = v[i + 1] elif i == (Nv - 1): v[i] = v[i - 1] else: v[i] = np.interp(opl[i], opl[[i - 1, i + 1]], v[[i - 1, i + 1]]) #mask += np.isnan(data[key]) if self.data_check["glitches"]: self.log.notice("Checking glitches", 3) for key in ["incl", "yctr", "zctr", "yend", "zend"]: mask += self.filter_gliches(data[key]) # :TODO: check what to do with invalid data # in mathlab Replace invalid FOGALE data by extrapolation using second order fit of self.data = data ## close the file if self.f: self.f.close()
def writeObs(self, objId, interpfuncs, simdata, idxObs, outfileName='out.txt', sedname='C.dat', seeingCol='FWHMgeom', expMJDCol='expMJD', expTimeCol='visitExpTime'): """ Call for each object; write out the observations of each object. """ # Return if there's nothing to write out. if len(idxObs) == 0: return # Open file if needed. try: self.outfile except AttributeError: self._openOutput(outfileName) # Calculate the ephemerides for the object, using the interpfuncs, for the times in simdata[idxObs]. tvis = simdata[expMJDCol][idxObs] ephs = np.recarray([len(tvis)], dtype=([('delta', '<f8'), ('ra', '<f8'), ('dec', '<f8'), ('magV', '<f8'), ('time', '<f8'), ('dradt', '<f8'), ('ddecdt', '<f8'), ('phase', '<f8'), ('solarelon', '<f8'), ('velocity', '<f8')])) for n in interpfuncs: ephs[n] = interpfuncs[n](tvis) ephs['time'] = tvis # Calculate the extra columns we want to write out # (dmag due to color, trailing loss, and detection loss) # First calculate and match the color dmag term. dmagColor = np.zeros(len(idxObs), float) dmagColorDict = self._calcColors(sedname) filterlist = np.unique(simdata[idxObs]['filter']) for f in filterlist: if f not in dmagColorDict: raise UserWarning( 'Could not find filter %s in calculated colors!' % (f)) match = np.where(simdata[idxObs]['filter'] == f)[0] dmagColor[match] = dmagColorDict[f] magFilter = ephs['magV'] + dmagColor # Calculate trailing and detection loses. dmagTrail, dmagDetect = self._calcMagLosses( ephs['velocity'], simdata[seeingCol][idxObs], simdata[expTimeCol][idxObs]) # Turn into a recarray so it's easier below. dmags = np.rec.fromarrays( [magFilter, dmagColor, dmagTrail, dmagDetect], names=['magFilter', 'dmagColor', 'dmagTrail', 'dmagDetect']) outCols = [ 'objId', ] + list(ephs.dtype.names) + list(simdata.dtype.names) + list( dmags.dtype.names) if not self.wroteHeader: writestring = '' for col in outCols: writestring += '%s ' % (col) self.outfile.write('%s\n' % (writestring)) self.wroteHeader = True # Write results. for eph, simdat, dm in zip(ephs, simdata[idxObs], dmags): writestring = '%s ' % (objId) for col in ephs.dtype.names: writestring += '%s ' % (eph[col]) for col in simdat.dtype.names: writestring += '%s ' % (simdat[col]) for col in dm.dtype.names: writestring += '%s ' % (dm[col]) self.outfile.write('%s\n' % (writestring)) self.outfile.flush()
def keypoints(self, image, mask=None): """Calculates the keypoints of the image TODO: use a temporary list with events and use a single test at the end :param image: ndimage of 2D (or 3D if RGB) :param mask: TODO: implement a mask for sieving out the keypoints :return: vector of keypoint (1D numpy array) """ # self.reset_timer() with self.sem: total_size = 0 keypoints = [] descriptors = [] assert image.shape[:2] == self.shape assert image.dtype in [self.dtype, numpy.float32] # old versions of pyopencl do not check for data contiguity if not (isinstance(image, pyopencl.array.Array)) and not ( image.flags["C_CONTIGUOUS"]): image = numpy.ascontiguousarray(image) t0 = time.time() if image.dtype == numpy.float32: if isinstance(image, pyopencl.array.Array): evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["scale_0"].data, image.data) else: evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["scale_0"].data, image) if self.profile: self.events.append(("copy H->D", evt)) elif self.dtype == numpy.float64: # A preprocessing kernel double_to_float exists, but is commented (RUNS ONLY ON GPU WITH FP64) # TODO: benchmark this kernel vs the current pure CPU format conversion with numpy.float32 # and uncomment it if it proves faster (dubious, because of data transfer bottleneck) evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["scale_0"].data, image.astype(numpy.float32)) if self.profile: self.events.append(("copy H->D", evt)) elif (len(image.shape) == 3) and (image.dtype == numpy.uint8) and (self.RGB): if isinstance(image, pyopencl.array.Array): evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data, image.data) else: evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data, image) if self.profile: self.events.append(("copy H->D", evt)) evt = self.kernels.get_kernel("rgb_to_float")( self.queue, self.procsize[0], self.wgsize[0], self.cl_mem["raw"].data, self.cl_mem["scale_0"].data, *self.scales[0]) if self.profile: self.events.append(("RGB -> float", evt)) elif self.dtype in self.converter: program = self.kernels.get_kernel(self.converter[self.dtype]) evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data, image) if self.profile: self.events.append(("copy H->D", evt)) evt = program(self.queue, self.procsize[0], self.wgsize[0], self.cl_mem["raw"].data, self.cl_mem["scale_0"].data, *self.scales[0]) if self.profile: self.events.append(("convert -> float", evt)) else: raise RuntimeError("invalid input format error (%s)" % (str(self.dtype))) wg1 = self.kernels_wg["max_min_global_stage1"] wg2 = self.kernels_wg["max_min_global_stage2"] if min(wg1, wg2) < self.red_size: # common bug on OSX when running on CPU logger.info( "Unable to use MinMax Reduction: stage1 wg: %s; stage2 wg: %s < max_work_group_size: %s, expected: %s", wg1, wg2, self.block_size, self.red_size) kernel = self.kernels.get_kernel("max_min_vec16") k = kernel(self.queue, (1, ), (1, ), self.cl_mem["scale_0"].data, numpy.int32(self.shape[0] * self.shape[1]), self.cl_mem["max"].data, self.cl_mem["min"].data) if self.profile: self.events.append(("max_min_serial", k)) # python implementation: # buffer_ = self.cl_mem["scale_0"].get() # self.cl_mem["max"].set(numpy.array([buffer_.max()], dtype=numpy.float32)) # self.cl_mem["min"].set(numpy.array([buffer_.min()], dtype=numpy.float32)) else: kernel1 = self.kernels.get_kernel("max_min_global_stage1") kernel2 = self.kernels.get_kernel("max_min_global_stage2") # logger.debug("self.red_size: %s", self.red_size) shm = pyopencl.LocalMemory(self.red_size * 2 * 4) k1 = kernel1(self.queue, (self.red_size * self.red_size, ), (self.red_size, ), self.cl_mem["scale_0"].data, self.cl_mem["max_min"].data, numpy.int32(self.shape[0] * self.shape[1]), shm) k2 = kernel2(self.queue, (self.red_size, ), (self.red_size, ), self.cl_mem["max_min"].data, self.cl_mem["max"].data, self.cl_mem["min"].data, shm) if self.profile: self.events.append(("max_min_stage1", k1)) self.events.append(("max_min_stage2", k2)) evt = self.kernels.get_kernel("normalizes")( self.queue, self.procsize[0], self.wgsize[0], self.cl_mem["scale_0"].data, self.cl_mem["min"].data, self.cl_mem["max"].data, self.cl_mem["255"].data, *self.scales[0]) if self.profile: self.events.append(("normalize", evt)) curSigma = 1.0 if par.DoubleImSize else 0.5 octave = 0 if self._init_sigma > curSigma: logger.debug("Bluring image to achieve std: %f", self._init_sigma) sigma = math.sqrt(self._init_sigma**2 - curSigma**2) self._gaussian_convolution(self.cl_mem["scale_0"], self.cl_mem["scale_0"], sigma, 0) for octave in range(self.octave_max): kp, descriptor = self._one_octave(octave) logger.info("in octave %i found %i kp" % (octave, kp.shape[0])) if len(kp): # sieve out coordinates with NaNs mask = numpy.where( numpy.logical_not(numpy.isnan(kp.sum(axis=-1)))) keypoints.append(kp[mask]) descriptors.append(descriptor[mask]) total_size += len(mask[0]) ######################################################################## # Merge keypoints in central memory ######################################################################## output = numpy.recarray(shape=(total_size, ), dtype=self.dtype_kp) last = 0 for ds, desc in zip(keypoints, descriptors): l = ds.shape[0] if l > 0: output[last:last + l].x = ds[:, 0] output[last:last + l].y = ds[:, 1] output[last:last + l].scale = ds[:, 2] output[last:last + l].angle = ds[:, 3] output[last:last + l].desc = desc last += l logger.info("Execution time: %.3fms" % (1000 * (time.time() - t0))) return output
def read_data(args, work, limit_bands=None, prefix='piff'): import fitsio RESERVED = 64 BAD_CCDS = [2, 31, 61] if args.file != '': print('Read file ',args.file) with open(args.file) as fin: exps = [ line.strip() for line in fin if line[0] != '#' ] print('File included %d exposures'%len(exps)) else: exps = args.exps print('Explicit listing of %d exposures'%len(exps)) exps = sorted(exps) keys = ['ra', 'dec', 'x', 'y', 'mag', 'obs_e1', 'obs_e2', 'obs_T', prefix+'_e1', prefix+'_e2', prefix+'_T'] all_data = { key : [] for key in keys } all_keys = keys all_data['exp'] = [] all_data['ccd'] = [] all_keys = all_keys + ['exp', 'ccd' ] if 'x' in keys: all_data['fov_x'] = [] all_data['fov_y'] = [] all_keys = all_keys + ['fov_x', 'fov_y'] inkeys = keys all_bands = [] # This keeps track of the band for each record #all_tilings = [] # This keeps track of the tiling for each record bands = set() # This is the set of all bands being used #tilings = set() # This is the set of all tilings being used for exp in exps: print('Start work on exp = ',exp) expnum = int(exp) print('expnum = ',expnum) expinfo = fitsio.read(os.path.join(work, exp, 'exp_info_%d.fits'%expnum)) if expnum not in expinfo['expnum']: print('expnum is not in expinfo!') print('expinfo[expnum] = ',expinfo['expnum']) print('Could not find information about this expnum. Skipping ',run,exp) continue i = np.nonzero(expinfo['expnum'] == expnum)[0][0] #print('i = ',i) band = expinfo['band'][i] #print('band[k] = ',band) if (limit_bands is not None) and (band not in limit_bands): print('Not doing band = %s.'%band) continue #tiling = int(expinfo['tiling'][k]) #print('tiling[k] = ',tiling) #if tiling == 0: # This shouldn't happen, but it did for a few exposures. Just skip them, since this # might indicate some kind of problem. #print('tiling == 0. Skip this exposure.') #continue #if tiling > args.max_tiling: #print('tiling is > %d. Skip this exposure.'%args.max_tiling) #continue for k in range(len(expinfo)): ccdnum = expinfo[k]['ccdnum'] if expinfo[k]['flag'] != 0: print('Skipping ccd %d because it is blacklisted: '%ccdnum, expinfo[k]['flag']) continue if ccdnum in BAD_CCDS: print('Skipping ccd %d because it is BAD'%ccdnum) continue cat_file = os.path.join(work, exp, "psf_cat_%d_%d.fits"%(expnum,ccdnum)) #print('cat_file = ',cat_file) try: data = fitsio.read(cat_file) flag = data[prefix+'_flag'] except (OSError, IOError): print('Unable to open cat_file %s. Skipping this file.'%cat_file) continue ntot = len(data) nused = np.sum((flag & 1) != 0) nreserved = np.sum((flag & RESERVED) != 0) ngood = np.sum(flag == 0) #print('nused = ',nused) #print('nreserved = ',nreserved) #print('ngood = ',ngood) if args.use_reserved: mask = (flag == RESERVED) | (flag == RESERVED+1) else: mask = (flag == 0) #print('mask = ',mask) T = data['obs_T'] dT = (data[prefix + '_T'] - data['obs_T']) de1 = (data[prefix + '_e1'] - data['obs_e1']) de2 = (data[prefix + '_e2'] - data['obs_e2']) used = (flag == 0) #if np.std(dT[used]/T[used]) > 0.03: #continue #if np.std(de1[used]) > 0.02: #continue #if np.std(de2[used]) > 0.02: #continue good = (abs(dT/data['obs_T']) < 0.1) & (abs(de1) < 0.1) & (abs(de2) < 0.1) mask = mask & good ngood = np.sum(mask) #print('ngood = ',ngood,'/',len(data)) assert ngood == len(data[mask]) if ngood == 0: print('All objects in ccd %d are flagged.'%ccdnum) print('Probably due to astrometry flags. Skip this exposure.') continue for key, inkey in zip(keys, inkeys): all_data[key].append(data[inkey][mask]) all_data['exp'].append([expnum] * ngood) all_data['ccd'].append([ccdnum] * ngood) if 'x' in keys: # Convert to focal position. x,y = toFocal(ccdnum, data['x'][mask], data['y'][mask]) # This comes back in units of mm. Convert to arcsec. # 1 pixel = 15e-3 mm = 0.263 arcsec x *= 0.263/15e-3 y *= 0.263/15e-3 all_data['fov_x'].append(x) all_data['fov_y'].append(y) all_bands.extend( ([band] * ngood) ) #all_tilings.extend( ([tiling] * ngood) ) bands.add(band) #tilings.add(tiling) print('\nFinished processing all exposures') print('bands = ',bands) #print('tilings = ',tilings) # Turn the data into a recarray print('all_data.keys = ',all_data.keys()) formats = ['f8'] * len(all_keys) + ['a1', 'i2'] #names = all_keys + ['band', 'tiling'] names = all_keys + ['band'] data = np.recarray(shape = (len(all_bands),), formats = formats, names = names) print('data.dtype = ',data.dtype) for key in all_keys: data[key] = np.concatenate(all_data[key]) data['band'] = all_bands #data['tiling'] = all_tilings print('made recarray') tilings = None return data, bands, tilings
def unwrap_specobjid(specObjID, run2d_integer=False, specLineIndex=False): """Unwrap CAS-style specObjID into plate, fiber, mjd, run2d. See :func:`~pydl.pydlutils.sdss.sdss_specobjid` for details on how the bits within a specObjID are assigned. Parameters ---------- specObjID : :class:`numpy.ndarray` An array containing 64-bit integers or strings. If strings are passed, they will be converted to integers internally. run2d_integer : :class:`bool`, optional If ``True``, do *not* attempt to convert the encoded run2d values to a string of the form 'vN_M_P'. specLineIndex : :class:`bool`, optional If ``True`` interpret any low-order bits as being an 'index' rather than a 'line'. Returns ------- :class:`numpy.recarray` A record array with the same length as `specObjID`, with the columns 'plate', 'fiber', 'mjd', 'run2d', 'line'. Examples -------- >>> from numpy import array, uint64 >>> from pydl.pydlutils.sdss import unwrap_specobjid >>> unwrap_specobjid(array([4565636362342690816], dtype=uint64)) rec.array([(4055, 408, 55359, 'v5_7_0', 0)], dtype=[('plate', '<i4'), ('fiber', '<i4'), ('mjd', '<i4'), ('run2d', '<U8'), ('line', '<i4')]) """ if (specObjID.dtype.type is np.string_ or specObjID.dtype.type is np.unicode_): tempobjid = specObjID.astype(np.uint64) elif specObjID.dtype.type is np.uint64: tempobjid = specObjID.copy() else: raise ValueError('Unrecognized type for specObjID!') run2d_dtype = 'U8' if run2d_integer: run2d_dtype = 'i4' line = 'line' if specLineIndex: line = 'index' unwrap = np.recarray(specObjID.shape, dtype=[('plate', 'i4'), ('fiber', 'i4'), ('mjd', 'i4'), ('run2d', run2d_dtype), (line, 'i4')]) unwrap.plate = np.bitwise_and(tempobjid >> 50, 2**14 - 1) unwrap.fiber = np.bitwise_and(tempobjid >> 38, 2**12 - 1) unwrap.mjd = np.bitwise_and(tempobjid >> 24, 2**14 - 1) + 50000 run2d = np.bitwise_and(tempobjid >> 10, 2**14 - 1) if run2d_integer: unwrap.run2d = run2d else: N = ((run2d // 10000) + 5).tolist() M = ((run2d % 10000) // 100).tolist() P = (run2d % 100).tolist() unwrap.run2d = [ 'v{0:d}_{1:d}_{2:d}'.format(n, m, p) for n, m, p in zip(N, M, P) ] unwrap[line] = np.bitwise_and(tempobjid, 2**10 - 1) return unwrap
def ReadMagData(Date, Minute=False, res=None, Ab=None, DetectGaps=None): ''' Reads binary magnetometer data from MESSENGER. Args: Date: 32-bit(minimum) integer with date in format yyyymmdd. Minute: If True - routing will read minute averages of MAG data, if False, then full resolution data will be read. res: Set resample resolution in seconds for data, by default res=None - no resampling, res=0.05 for evenly spaced 20Hz sampling. Ab: Angle to aberate X and Y components of the data by, in degrees. When set to None, the aberation angle will be found automatically. DetectGaps: Largest data gap size (in hours) to interpolate over, if DetectGaps=None then all gaps will be interpolated over, otherwise gaps will be filled with NaN. Returns: np.recarray of MAG data ''' fname = '{:08d}.bin'.format(Date) if Minute == True: path = Globals.MessPath + 'MAG/Binary/MSO/Minute/' else: path = Globals.MessPath + 'MAG/Binary/MSO/Full/' dtype = [('Date', 'int32'), ('ut', 'float32'), ('Xmso', 'float32'), ('Ymso', 'float32'), ('Zmso', 'float32'), ('Xmsm', 'float32'), ('Ymsm', 'float32'), ('Zmsm', 'float32'), ('Bx', 'float32'), ('By', 'float32'), ('Bz', 'float32')] if os.path.isfile(path + fname) == False: out = np.recarray(0, dtype=dtype) return out data = RT.ReadRecarray(path + fname, dtype) if Ab is None: tmp = GetAberrationAngle(Date) Ab = tmp.Angle if Ab != 0.0: #rotate spacecraft position into aberrated coords data.Xmsm, data.Ymsm = RotTrans(data.Xmsm, data.Ymsm, Ab * np.pi / 180.0) data.Xmso, data.Ymso = RotTrans(data.Xmso, data.Ymso, Ab * np.pi / 180.0) #rotate bx,by into aberrated coordinate system data.Bx, data.By = RotTrans(data.Bx, data.By, Ab * np.pi / 180.0) if res != None: UTo = np.array(data.ut) length = np.int32(86400 / res) newdata = np.recarray(length, dtype=dtype) ntags = np.size(data.dtype.names) newdata.ut = 24 * np.arange(length, dtype='float32') / length newdata.Date = Date for i in range(2, ntags): f = InterpolatedUnivariateSpline(data.ut, data[data.dtype.names[i]]) newdata[newdata.dtype.names[i]] = f(newdata.ut) if DetectGaps != None: #set Detect gaps to the largest number of seconds gap (5s is used elsewhere) MaxUTGapHr = DetectGaps / 3600.0 bad = np.zeros(length, dtype='bool') for i in range(0, UTo.size - 1): if (UTo[i + 1] - UTo[i]) > MaxUTGapHr: b = np.where((newdata.ut > UTo[i]) & (newdata.ut < UTo[i + 1]))[0] bad[b] = True baddata = np.where(bad)[0] dtags = ['Bx', 'By', 'Bz'] for i in range(0, 6): if dtags[i] in data.dtype.names: newdata[dtags[i]][baddata] = np.float32(np.nan) return newdata else: return data
df = pd.DataFrame(a, columns=['date', 'val', 'character_col']) # Convert into numpy recarray to preserve the dtypes np_array = df.to_records(index=False) del df shape, dtype = np_array.shape, np_array.dtype print(f"np_array's size={np_array.nbytes/1e6}MB") # With shared memory # Start tracking memory usage tracemalloc.start() start_time = time.time() with SharedMemoryManager() as smm: # Create a shared memory of size np_arry.nbytes shm = smm.SharedMemory(np_array.nbytes) # Create a np.recarray using the buffer of shm shm_np_array = np.recarray(shape=shape, dtype=dtype, buf=shm.buf) # Copy the data into the shared memory np.copyto(shm_np_array, np_array) # Spawn some processes to do some work with ProcessPoolExecutor(cpu_count()) as exe: fs = [exe.submit(work_with_shared_memory, shm.name, shape, dtype) for _ in range(cpu_count())] for _ in as_completed(fs): pass # Check memory usage current, peak = tracemalloc.get_traced_memory() print(f"Current memory usage {current/1e6}MB; Peak: {peak/1e6}MB") print(f'Time elapsed: {time.time()-start_time:.2f}s') tracemalloc.stop() # Without shared memory
def test_ichimoku_kinko_hyo(self): window_length = 52 today = pd.Timestamp('2014', tz='utc') nassets = 5 assets = pd.Index(np.arange(nassets)) days_col = np.arange(window_length)[:, np.newaxis] highs = np.arange(nassets) + 2 + days_col closes = np.arange(nassets) + 1 + days_col lows = np.arange(nassets) + days_col tenkan_sen_length = 9 kijun_sen_length = 26 chikou_span_length = 26 ichimoku_kinko_hyo = IchimokuKinkoHyo( window_length=window_length, tenkan_sen_length=tenkan_sen_length, kijun_sen_length=kijun_sen_length, chikou_span_length=chikou_span_length, ) dtype = [ ('tenkan_sen', 'f8'), ('kijun_sen', 'f8'), ('senkou_span_a', 'f8'), ('senkou_span_b', 'f8'), ('chikou_span', 'f8'), ] out = np.recarray( shape=(nassets,), dtype=dtype, buf=np.empty(shape=(nassets,), dtype=dtype), ) ichimoku_kinko_hyo.compute( today, assets, out, highs, lows, closes, tenkan_sen_length, kijun_sen_length, chikou_span_length, ) expected_tenkan_sen = np.array([ (53 + 43) / 2, (54 + 44) / 2, (55 + 45) / 2, (56 + 46) / 2, (57 + 47) / 2, ]) expected_kijun_sen = np.array([ (53 + 26) / 2, (54 + 27) / 2, (55 + 28) / 2, (56 + 29) / 2, (57 + 30) / 2, ]) expected_senkou_span_a = (expected_tenkan_sen + expected_kijun_sen) / 2 expected_senkou_span_b = np.array([ (53 + 0) / 2, (54 + 1) / 2, (55 + 2) / 2, (56 + 3) / 2, (57 + 4) / 2, ]) expected_chikou_span = np.array([ 27.0, 28.0, 29.0, 30.0, 31.0, ]) assert_equal( out.tenkan_sen, expected_tenkan_sen, msg='tenkan_sen', ) assert_equal( out.kijun_sen, expected_kijun_sen, msg='kijun_sen', ) assert_equal( out.senkou_span_a, expected_senkou_span_a, msg='senkou_span_a', ) assert_equal( out.senkou_span_b, expected_senkou_span_b, msg='senkou_span_b', ) assert_equal( out.chikou_span, expected_chikou_span, msg='chikou_span', )