def test_drop_fields(self): # Test drop_fields a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) # A basic field test = drop_fields(a, 'a') control = np.array([((2, 3.0), ), ((5, 6.0), )], dtype=[('b', [('ba', float), ('bb', int)])]) assert_equal(test, control) # Another basic field (but nesting two fields) test = drop_fields(a, 'b') control = np.array([(1, ), (4, )], dtype=[('a', int)]) assert_equal(test, control) # A nested sub-field test = drop_fields(a, [ 'ba', ]) control = np.array([(1, (3.0, )), (4, (6.0, ))], dtype=[('a', int), ('b', [('bb', int)])]) assert_equal(test, control) # All the nested sub-field from a field: zap that field test = drop_fields(a, ['ba', 'bb']) control = np.array([(1, ), (4, )], dtype=[('a', int)]) assert_equal(test, control) test = drop_fields(a, ['a', 'b']) assert_(test is None)
def _build_trajectories(data): # """ # build_trajectories(data) is responsible for the book keeping of the trajectories, # using the prev,next fields in the frames,creating new set of data, in the form of trajectories # """ # first frame, initialization trajid = 0 # running trajid counter frame = data[0] frame.trajid = n.nan * n.empty_like(frame.x) ind = frame.next > -2 frame.trajid[ind] = range(trajid, trajid + ind.size) trajid = trajid + ind.size for i, frame in data[1:]: frame.trajid = n.nan * n.empty_like(frame.x) old = frame.prev > -1 frame.trajid[old] = data[i - 1].trajid[frame.prev[old]] ind = frame.prev < 0 and frame.next > -2 frame.trajid[ind] = range(trajid, trajid + ind.size) trajid = trajid + ind.size drop_fields(frame, ['prev', 'next']) for frame in data: frame = frame[~n.isnan(frame)] frame.t = frame.t * n.ones_like(frame.x) return data
def test_drop_fields(self): # Test drop_fields a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=[("a", int), ("b", [("ba", float), ("bb", int)])]) # A basic field test = drop_fields(a, "a") control = np.array([((2, 3.0),), ((5, 6.0),)], dtype=[("b", [("ba", float), ("bb", int)])]) assert_equal(test, control) # Another basic field (but nesting two fields) test = drop_fields(a, "b") control = np.array([(1,), (4,)], dtype=[("a", int)]) assert_equal(test, control) # A nested sub-field test = drop_fields(a, ["ba"]) control = np.array([(1, (3.0,)), (4, (6.0,))], dtype=[("a", int), ("b", [("bb", int)])]) assert_equal(test, control) # All the nested sub-field from a field: zap that field test = drop_fields(a, ["ba", "bb"]) control = np.array([(1,), (4,)], dtype=[("a", int)]) assert_equal(test, control) test = drop_fields(a, ["a", "b"]) assert_(test is None)
def test_drop_fields(self): # Test drop_fields a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) # A basic field test = drop_fields(a, 'a') control = np.array([((2, 3.0),), ((5, 6.0),)], dtype=[('b', [('ba', float), ('bb', int)])]) assert_equal(test, control) # Another basic field (but nesting two fields) test = drop_fields(a, 'b') control = np.array([(1,), (4,)], dtype=[('a', int)]) assert_equal(test, control) # A nested sub-field test = drop_fields(a, ['ba', ]) control = np.array([(1, (3.0,)), (4, (6.0,))], dtype=[('a', int), ('b', [('bb', int)])]) assert_equal(test, control) # All the nested sub-field from a field: zap that field test = drop_fields(a, ['ba', 'bb']) control = np.array([(1,), (4,)], dtype=[('a', int)]) assert_equal(test, control) test = drop_fields(a, ['a', 'b']) assert_(test is None)
def test_drop_fields(self): # Test drop_fields a = np.array( [(1, (2, 3.0)), (4, (5, 6.0))], dtype=[("a", int), ("b", [("ba", float), ("bb", int)])], ) # A basic field test = drop_fields(a, "a") control = np.array([((2, 3.0), ), ((5, 6.0), )], dtype=[("b", [("ba", float), ("bb", int)])]) assert_equal(test, control) # Another basic field (but nesting two fields) test = drop_fields(a, "b") control = np.array([(1, ), (4, )], dtype=[("a", int)]) assert_equal(test, control) # A nested sub-field test = drop_fields(a, [ "ba", ]) control = np.array([(1, (3.0, )), (4, (6.0, ))], dtype=[("a", int), ("b", [("bb", int)])]) assert_equal(test, control) # All the nested sub-field from a field: zap that field test = drop_fields(a, ["ba", "bb"]) control = np.array([(1, ), (4, )], dtype=[("a", int)]) assert_equal(test, control) test = drop_fields(a, ["a", "b"]) assert_(test is None)
def _build_trajectories(data): # """ # build_trajectories(data) is responsible for the book keeping of the trajectories, # using the prev,next fields in the frames,creating new set of data, in the form of trajectories # """ # first frame, initialization trajid = 0 # running trajid counter frame = data[0] frame.trajid = n.nan*n.empty_like(frame.x) ind = frame.next > -2 frame.trajid[ind] = range(trajid,trajid+ind.size) trajid = trajid+ind.size for i,frame in data[1:]: frame.trajid = n.nan*n.empty_like(frame.x) old = frame.prev > -1 frame.trajid[old] = data[i-1].trajid[frame.prev[old]] ind = frame.prev < 0 and frame.next > -2 frame.trajid[ind] = range(trajid,trajid+ind.size) trajid = trajid+ind.size drop_fields(frame,['prev','next']) for frame in data: frame = frame[~n.isnan(frame)] frame.t = frame.t*n.ones_like(frame.x) return data
def get_raw_chamber_data(self,filtered_data): # chamber_dtype = numpy.dtype([('time_secs', '<u4'), # ('time_nsecs', '<u4'), # ('time_rel', '<f4'), # ('status', '|S25'), # ('tunnel', '<u2'), # ('fly_x', '<f4'), # ('fly_y', '<f4'), # ('fly_angle', '<f4'), # ]) header = list(FILE_TOOLS.chamber_dtype.names) tracking_chamber_data = filtered_data[filtered_data['status'] != 'Walk To End'] tracking_chamber_data = tracking_chamber_data[header] tracking_chamber_data = tracking_chamber_data.astype(FILE_TOOLS.chamber_dtype) tracking_chamber_data['tunnel'] = tracking_chamber_data['tunnel']+1 indicies = tracking_chamber_data['status'] == 'End Chamber Ethanol' raw_chamber_data_ethanol = tracking_chamber_data[indicies] raw_chamber_data_ethanol = recfunctions.drop_fields(raw_chamber_data_ethanol, 'status', usemask=False) status_array = numpy.array(['Ethanol']*len(raw_chamber_data_ethanol),dtype='|S25') raw_chamber_data_ethanol = recfunctions.append_fields(raw_chamber_data_ethanol, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = raw_chamber_data_ethanol ethanol_start_time = raw_chamber_data_ethanol['time_rel'][0] indicies = tracking_chamber_data['status'] == 'End Chamber Air' indicies &= tracking_chamber_data['time_rel'] < ethanol_start_time raw_chamber_data_air_before = tracking_chamber_data[indicies] raw_chamber_data_air_before = recfunctions.drop_fields(raw_chamber_data_air_before, 'status', usemask=False) status_array = numpy.array(['AirBefore']*len(raw_chamber_data_air_before),dtype='|S25') raw_chamber_data_air_before = recfunctions.append_fields(raw_chamber_data_air_before, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data_air_before,raw_chamber_data),usemask=False) indicies = tracking_chamber_data['status'] == 'End Chamber Air' indicies &= tracking_chamber_data['time_rel'] > ethanol_start_time raw_chamber_data_air_after = tracking_chamber_data[indicies] raw_chamber_data_air_after = recfunctions.drop_fields(raw_chamber_data_air_after, 'status', usemask=False) status_array = numpy.array(['AirAfter']*len(raw_chamber_data_air_after),dtype='|S25') raw_chamber_data_air_after = recfunctions.append_fields(raw_chamber_data_air_after, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data,raw_chamber_data_air_after),usemask=False) return raw_chamber_data
def mese_followup(**kwargs): livetime = 988.54 livetime += 358.402 livetime += 368.381 print("\tLoading MESE with 2 follow-up years...") exp = np.append(np.load(os.path.join(path, "MESE_exp.npy")), np.load(os.path.join(path, "MESE_followup_exp.npy"))) mc = np.load(os.path.join(path, "MESE_MC.npy")) if "dist" in exp.dtype.names: exp = drop_fields(exp, ["dist"], usemask=False) if "dist" in mc.dtype.names: mc = drop_fields(mc, ["dist"], usemask=False) sinDec = kwargs.pop("sinDec", [-1., hem]) exp = exp[(exp["sinDec"] > sinDec[0]) & (exp["sinDec"] < sinDec[-1])] mc = mc[(mc["sinDec"] > sinDec[0]) & (mc["sinDec"] < sinDec[-1])] dec_bins = np.unique( np.concatenate([ np.linspace(-1., -0.93, 4 + 1), np.linspace(-0.93, hem, 12 + 1), ])) dec_bins = dec_bins[(dec_bins >= sinDec[0]) & (dec_bins <= sinDec[1])] dec_bins = np.unique(np.concatenate([sinDec, dec_bins])) dec_bins_logE = np.linspace(-1., hem, 4 + 1) dec_bins_logE = dec_bins_logE[(dec_bins_logE >= sinDec[0]) & (dec_bins_logE <= sinDec[1])] dec_bins_logE = np.unique(np.concatenate([sinDec, dec_bins_logE])) energy_bins = [ np.linspace(2., 8.5, 67 + 1), np.linspace(-1., hem, 4 + 1), ] mc = mc[mc["logE"] > 1.] llh_model = EnergyLLH(twodim_bins=energy_bins, sinDec_bins=dec_bins) if "upscale" in kwargs and kwargs["upscale"] is not None and ( kwargs["upscale"] or not type(kwargs["upscale"]) == bool): lt = kwargs.pop("livetime", livetime) kwargs["upscale"] = (int(kwargs.pop("upscale")), lt) kwargs.pop("livetime", None) kwargs.setdefault("mode", "all") kwargs.setdefault("seed", 20101112) llh = StackingPointSourceLLH(exp, mc, livetime, llh_model=llh_model, **kwargs) print("{0:>80s}".format("[done]")) return llh
def ic86_2012_bdt(**kwargs): livetime = 331.88 print("\tLoading IC86-II...") exp = np.load(os.path.join(path, "IC86-2012_exp.npy")) mc = np.load(os.path.join(path, "IC86-2012_MC.npy")) sinDec = kwargs.pop("sinDec", [-1., 1.]) exp = exp[(exp["sinDec"] > sinDec[0]) & (exp["sinDec"] < sinDec[-1])] mc = mc[(mc["sinDec"] > sinDec[0]) & (mc["sinDec"] < sinDec[-1])] exp = drop_fields(exp, ["BDT2", "perc"]) mc = drop_fields(mc, ["BDT2", "perc"]) dec_bins = np.unique( np.concatenate([ np.linspace(-1., -0.92, 5 + 1), np.linspace(-0.92, -0.15, 10 + 1), np.linspace(-0.15, 0.01, 10 + 1), np.linspace(0.01, 1., 20 + 1), ])) dec_bins = dec_bins[(dec_bins >= sinDec[0]) & (dec_bins <= sinDec[1])] dec_bins = np.unique(np.concatenate([sinDec, dec_bins])) X = np.concatenate([exp["BDT"], mc["BDT"]]) bdt_bins = np.percentile(X, [0., 20., 40., 60., 80., 100.]) energy_bdt_bins = [ np.linspace(1., 10., 40 + 1), np.concatenate([[bdt_bins[0] - (bdt_bins[1] - bdt_bins[0])], bdt_bins, [bdt_bins[-1] + bdt_bins[-1] - bdt_bins[-2]]]), dec_bins ] llh_model = EnergyBDTLLH(bins=energy_bdt_bins, sinDec_bins=dec_bins) if "upscale" in kwargs and kwargs["upscale"] is not None and ( kwargs["upscale"] or not type(kwargs["upscale"]) == bool): lt = kwargs.pop("livetime", livetime) kwargs["upscale"] = (int(kwargs.pop("upscale")), lt) kwargs.pop("livetime", None) kwargs.setdefault("seed", 2012) llh = StackingPointSourceLLH(exp, mc, livetime, llh_model=llh_model, **kwargs) print("{0:>80s}".format("[done]")) return llh
def setup_cart(results, classify, incl_unc=[], mass_min=0.05): """helper function for performing cart Parameters ---------- results : tuple of structured array and dict with numpy arrays the return from :meth:`perform_experiments`. classify : string, function or callable either a string denoting the outcome of interest to use or a function. incl_unc : list of strings mass_min : float Raises ------ TypeError if classify is not a string or a callable. """ if not incl_unc: x = np.ma.array(results[0]) else: drop_names = set(recfunctions.get_names(results[0].dtype))-set(incl_unc) x = recfunctions.drop_fields(results[0], drop_names, asrecarray = True) if type(classify)==types.StringType: y = results[1][classify] elif callable(classify): y = classify(results[1]) else: raise TypeError("unknown type for classify") return CART(x, y, mass_min)
def deepLosslessCompress(f, group): paths = findDatasets(f, group, "Events") paths = [path for path in paths if "Basecall" in path] # index event detection if "UniqueGlobalKey/channel_id" in f: sampleRate = f["UniqueGlobalKey/channel_id"].attrs["sampling_rate"] for path in paths: if f[path].parent.parent.attrs.__contains__("event_detection"): # index back to event detection dataset = f[path].value start = np.array([int(round(sampleRate * i)) for i in dataset["start"]]) dataset = indexToZero(f, path, "start", dataColumn=start) move = dataset["move"] # rewrite move dataset because it's int64 for max 2 # otherwise, event by event dataset = drop_fields(dataset, ["mean", "stdv", "length", "move"]) dataset = append_fields(dataset, ["move"], [move], [getDtype(move)]) rewriteDataset(f, path, compression="gzip", compression_opts=9, dataset=dataset) # rewrite eventdetection too - start is also way too big here eventDetectionPath = findDatasets(f, "all", entry_point=f[path].parent.parent.attrs.get("event_detection"))[0] if "picopore.start_index" not in f[eventDetectionPath].attrs.keys(): eventData = indexToZero(f, eventDetectionPath, "start") rewriteDataset(f, eventDetectionPath, compression="gzip", compression_opts=9, dataset=eventData) if __basegroup_name__ not in f: f.create_group(__basegroup_name__) for name, group in f.items(): if name != __basegroup_name__: recursiveCollapseGroups(f, __basegroup_name__, name, group) return losslessCompress(f, group)
def __init__(self, x, y, mass_min=0.05, mode=sdutil.BINARY): ''' init ''' x = recfunctions.drop_fields(x, "scenario_id", asrecarray=True) self.x = x self.y = y self.mass_min = mass_min self.mode = mode # we need to transform the structured array to a ndarray # we use dummy variables for each category in case of categorical # variables. Integers are treated as floats self.feature_names = [] columns = [] for unc, dtype in x.dtype.descr: dtype = x.dtype.fields[unc][0] if dtype == np.object: categories = sorted(list(set(x[unc]))) for cat in categories: label = '{}{}{}'.format(unc, self.sep, cat) self.feature_names.append(label) columns.append(x[unc] == cat) else: self.feature_names.append(unc) columns.append(x[unc]) self._x = np.column_stack(columns) self._boxes = None self._stats = None
def data_save(data, output_filename): # This isn't too hard, except we're going to put a copy of the # measures we actually care about at the beginning! names = list(data.dtype.names) # Find all the columns that have 'av' in their title and not # and not '_mask' drop_names = [ name for name in names if (name.find('_av_') == -1) | (name.find('_mask') > 0) ] drop_names.pop(0) important_data = rec.drop_fields(data, drop_names, usemask=False, asrecarray=True) names = list(important_data.dtype.names) # Strip the beginning part to get shorter and easy to manage variable names names[1:] = [ name[6:] for name in names[1:] ] names[1:] = [ name[:(-8)] for name in names[1:] ] names[1:] = [ name[0].upper() + name[1:] + 'Cort' for name in names[1:] ] names[0] = 'SubID' important_data.dtype.names = names # Create two temporaray output_filenames: temp_filename1 = output_filename + '_temp1' temp_filename2 = output_filename + '_temp2' plt.rec2csv(data, temp_filename1, delimiter='\t', formatd=None, withheader=True) plt.rec2csv(important_data, temp_filename2, delimiter='\t', formatd=None, withheader=True) mcf.KW_paste(temp_filename2, temp_filename1, output_filename) mcf.KW_rmforce(temp_filename1) mcf.KW_rmforce(temp_filename2)
def produce_trial( analysis: Analysis, flux_norm: float = 0, random_seed: Optional[int] = None, n_signal_observed: Optional[int] = None, verbose: bool = False, **kwargs, ) -> np.ndarray: """Produces a single trial of background+signal events based on inputs. Args: analysis: flux_norm: A flux normaliization to adjust weights. random_seed: A seed value for the numpy RNG. n_signal_observed: verbose: A flag to print progress. Returns: An array of combined signal and background events. """ # kwargs no-op len(kwargs) if random_seed is not None: np.random.seed(random_seed) background = analysis.model.inject_background_events() background['time'] = analysis.model.scramble_times(background['time']) if flux_norm > 0 or n_signal_observed is not None: signal = analysis.model.inject_signal_events( flux_norm, n_signal_observed, ) signal['time'] = analysis.model.scramble_times( signal['time'], background=False, ) else: signal = np.empty(0, dtype=background.dtype) if verbose: print(f'number of background events: {len(background)}') print(f'number of signal events: {len(signal)}') # Because we want to return the entire event and not just the # number of events, we need to do some numpy magic. Specifically, # we need to remove the fields in the simulated events that are # not present in the data events. These include the true direction, # energy, and 'oneweight'. signal = rf.drop_fields( signal, [n for n in signal.dtype.names if n not in background.dtype.names]) # Combine the signal background events and time-sort them. # Use recfunctions.stack_arrays to prevent numpy from scrambling entry order events = rf.stack_arrays([background, signal], autoconvert=True) return events
def format_data(outcomes, experiments, var): x = experiments.astype(float) y = outcomes.ix[:, var].values x = x.to_records() x = recfunctions.drop_fields(x, 'index') results = (x, {'y': y}) return results
def _prepare_experiments(experiments): ''' transform the experiments structured array into a numpy array. Parameters ---------- experiments : structured array Returns ------- ndarray ''' experiments = recfunctions.drop_fields(experiments, "scenario_id", asrecarray=True) uncs = recfunctions.get_names(experiments.dtype) temp_experiments = np.zeros((experiments.shape[0], len(uncs))) for i, u in enumerate(uncs): try: temp_experiments[:,i] = experiments[u].astype(np.float) except ValueError: data = experiments[u] entries = sorted(list(set(data))) for j, entry in enumerate(entries): temp_experiments[data==entry,i] = j return temp_experiments, uncs
def __init__(self, x, y, mass_min=0.05, mode=sdutil.BINARY): ''' init ''' x = recfunctions.drop_fields(x, "scenario_id", asrecarray=True) self.x = x self.y = y self.mass_min = mass_min self.mode = mode # we need to transform the structured array to a ndarray # we use dummy variables for each category in case of categorical # variables. Integers are treated as floats self.feature_names = [] columns = [] for unc, dtype in x.dtype.descr: dtype = x.dtype.fields[unc][0] if dtype == np.object: categories = sorted(list(set(x[unc]))) for cat in categories: label = '{}{}{}'.format(unc, self.sep, cat) self.feature_names.append(label) columns.append(x[unc] == cat) else: self.feature_names.append(unc) columns.append(x[unc]) self._x = np.column_stack(columns) self._boxes = None self._stats = None
def build_signal_TS(self, signal_trials=200, result=False, result_file=None): r'''build signal TS distribution args: signal_trials: Number of trials result: Whether storing the full result in self.result.Default is False. result_file:Whether storing the full result in file.Default is False. return: TS: The TS array ''' TS = [] ts_result = [] for i in range(signal_trials): data = self.draw_data() signal = self.draw_signal() signal = rf.drop_fields(signal, [n for n in signal.dtype.names \ if not n in data.dtype.names]) self.point_source.update_data(np.concatenate([data, signal])) TS.append(self.point_source.eval_llh_fit_ns()[1]) ts_result.append(self.point_source.get_fit_result) if result: np.save(result_file, np.array(ts_result)) return np.array(TS)
def rotate_struct(ev, ra, dec): r"""Wrapper around the rotate-method in skylab.utils for structured arrays. Parameters ---------- ev : structured array Event information with ra, sinDec, plus true information ra, dec : float Coordinates to rotate the true direction onto Returns -------- ev : structured array Array with rotated value, true information is deleted """ names = ev.dtype.names rot = np.copy(ev) # Function call rot["ra"], rot_dec = rotate(ev["trueRa"], ev["trueDec"], ra * np.ones(len(ev)), dec * np.ones(len(ev)), ev["ra"], np.arcsin(ev["sinDec"])) if "dec" in names: rot["dec"] = rot_dec rot["sinDec"] = np.sin(rot_dec) # "delete" Monte Carlo information from sampled events mc = ["trueRa", "trueDec", "trueE", "ow"] return drop_fields(rot, mc)
def mergeTimeColumns(data, years, months, days, hours): datetimes = [] hours2 = [] for idx in range(data.shape[0]): datetimes.append( datetime.datetime(years[idx], months[idx], days[idx], hours[idx])) hours2.append((datetimes[idx] - datetimes[0]).total_seconds() / 3600.0) data = recfunctions.append_fields(data, data=datetimes, names="datetimes", dtypes='M8[us]') data = recfunctions.append_fields(data, data=hours2, names="hours", dtypes='int64') data = recfunctions.drop_fields(data, drop_names=('year', 'month', 'day', 'hour')) print " Data set after merging time columns:" print "%-20s %-20s %-20s %-40s" % ("\tColumn Name", "Number of Elements", "Data Type", "Nulls found") for name in data.dtype.names: print "%-20s %-20s %-20s %-40s" % ("\t" + name, str(len( data[name])), data[name].dtype, "and TODO number of nulls") return data
def appendFieldsToRecarray(recarray, data, fieldnames): """ Return recarray with new fields appended, will override if exists. :param recarray: Recarray to append to :type recarray: list :param data: Data :type data: list or np.array :param fieldnames: Names of new column in numpy.recarray :type fieldnames: list of str :return: Recarray with new field appended :rtype: numpy.recarray """ from numpy.lib.recfunctions import append_fields, drop_fields if isinstance(data, list): if recarray.size != len(data[0]): print("Warning: Cannot append array of size " + str(len(data)) + " to recarray of size " + str(recarray.size)) return recarray else: if recarray.size != data.size: print("Warning: Cannot append array of size " + str(data.size) + " to recarray of size " + str(recarray.size)) return recarray rec = drop_fields(recarray, fieldnames) dtypes = ['f4'] * len(fieldnames) rec = append_fields(rec, np.array(fieldnames), data, dtypes=dtypes, asrecarray=True, usemask=False) return rec
def appendFieldToRecarray(recarray, data, fieldname): """ Return recarray with new field appended, will override if exists. :param recarray: Recarray to append to :type recarray: list :param data: Data :type data: list :param fieldname: Name of new column in numpy.recarray :type fieldname: str :return: Recarray with new field appended :rtype: numpy.recarray """ from numpy.lib.recfunctions import append_fields, drop_fields if recarray.size != data.size: printWarning("Cannot append array of size " + str(data.size) + " to recarray of size " + str(recarray.size)) return recarray rec = drop_fields(recarray, fieldname) rec = append_fields(rec, fieldname, data, dtypes='f4', asrecarray=True, usemask=False) return rec
def _rotate_subset(self, value, orig_experiments, logical): ''' rotate a subset Parameters ---------- value : list of strings orig_experiment : numpy structured array logical : boolean array ''' list_dtypes = [(name, "<f8") for name in value] #cast everything to float drop_names = set(rf.get_names(orig_experiments.dtype)) - set(value) orig_subset = rf.drop_fields(orig_experiments, drop_names, asrecarray=True) subset_experiments = orig_subset.astype(list_dtypes).view('<f8').reshape(orig_experiments.shape[0], len(value)) #normalize the data mean = np.mean(subset_experiments,axis=0) std = np.std(subset_experiments, axis=0) std[std==0] = 1 #in order to avoid a devision by zero subset_experiments = (subset_experiments - mean)/std #get the experiments of interest experiments_of_interest = subset_experiments[logical] #determine the rotation rotation_matrix = self._determine_rotation(experiments_of_interest) #apply the rotation subset_experiments = np.dot(subset_experiments,rotation_matrix) return rotation_matrix, subset_experiments
def merge_cort(data, cortisol_filename): cort_data = np.genfromtxt(cortisol_filename, dtype=None, names=True, delimiter='\t') names = list(cort_data.dtype.names) # Find all the columns in cort_data that have 'av' in their title # and not '_mask' drop_names = names[8:] cort_data = nprf.drop_fields(cort_data, drop_names, usemask=False, asrecarray=True) data = nprf.join_by('SubID', data, cort_data, jointype='leftouter', r1postfix='KW', r2postfix='KW2', usemask=False,asrecarray=True) # Bizzarely, the join_by function pads with the biggest numbers it can think of! # So we're going to replace everything over 999 with 999 for name in names[1:8]: data[name][data[name]>999] = 999 # Define a UsableCort field: 1 if ANY of the cortisol values are not 999 cort_array = np.vstack( [ data[name] for name in names[1:8]]) usable_cort_array = np.ones(cort_array.shape[1]) usable_cort_array[np.any(cort_array<>999, axis=0)] = 1 data = nprf.append_fields(base = data, names='UsableCort', data = usable_cort_array, usemask=False) return data
def DoJoin(balrog, row, size, odir, zz, names, end=None, cols=False, field=None): if not os.path.exists(odir): os.makedirs(odir) if end is None: end = row + len(zz) if end > size: end = size ee = end - row b = balrog[-1].read(rows=np.arange(row,end)) d = [] for name in names: d.append(zz[name][:ee]) n = list(names) n.append('field') d.append(np.array([field]*len(b))) c = rec.append_fields(b, n, d) if 'table' in c.dtype.names: c = rec.drop_fields(c, 'table') ofile = os.path.join(odir, '%i-%i.fits'%(row,end)) esutil.io.write(ofile, c, clobber=True) if cols: return end, c.dtype.names else: return end
def __init__(self, filename, date_sep='-', time_sep=':', format='stroke_DC3'): """ Load NLDN data from a file, into a numpy named array stored in the *data* attribute. *data*['time'] is relative to the *basedate* datetime attribute """ self.format=format dtype_specs = getattr(self, format) nldn_initial = np.genfromtxt(filename, dtype=dtype_specs['columns']) date_part = np.genfromtxt(nldn_initial['date'], delimiter=date_sep, dtype=dtype_specs['date_dtype']) time_part = np.genfromtxt(nldn_initial['time'], delimiter=time_sep, dtype=dtype_specs['time_dtype']) dates = [datetime(a['year'], a['month'], a['day'], b['hour'], b['minute']) for a, b in zip(date_part, time_part)] min_date = min(dates) min_date = datetime(min_date.year, min_date.month, min_date.day) t = np.fromiter( ((d-min_date).total_seconds() for d in dates), dtype='float64') t += time_part['second'] self.basedate = min_date data = drop_fields(nldn_initial, ('date', 'time')) data = append_fields(data, 'time', t) self.data = data
def __init__(self, filename, date_sep='-', time_sep=':', format='stroke_DC3'): """ Load NLDN data from a file, into a numpy named array stored in the *data* attribute. *data*['time'] is relative to the *basedate* datetime attribute """ self.format = format dtype_specs = getattr(self, format) nldn_initial = np.genfromtxt(filename, dtype=dtype_specs['columns']) date_part = np.genfromtxt(nldn_initial['date'], delimiter=date_sep, dtype=dtype_specs['date_dtype']) time_part = np.genfromtxt(nldn_initial['time'], delimiter=time_sep, dtype=dtype_specs['time_dtype']) dates = [ datetime(a['year'], a['month'], a['day'], b['hour'], b['minute']) for a, b in zip(date_part, time_part) ] min_date = min(dates) min_date = datetime(min_date.year, min_date.month, min_date.day) t = np.fromiter(((d - min_date).total_seconds() for d in dates), dtype='float64') t += time_part['second'] self.basedate = min_date data = drop_fields(nldn_initial, ('date', 'time')) data = append_fields(data, 'time', t) self.data = data
def rotate_struct(ev, ra, dec): r"""Wrapper around the rotate-method in skylab.utils for structured arrays. Parameters ---------- ev : structured array Event information with ra, sinDec, plus true information ra, dec : float Coordinates to rotate the true direction onto Returns -------- ev : structured array Array with rotated value, true information is deleted """ names = ev.dtype.names rot = np.copy(ev) # Function call rot["ra"], rot_dec = rotate(ev["trueRa"], ev["trueDec"], ra * np.ones(len(ev)), dec * np.ones(len(ev)), ev["ra"], np.arcsin(ev["sinDec"])) if "dec" in names: rot["dec"] = rot_dec rot["sinDec"] = np.sin(rot_dec) # "delete" Monte Carlo information from sampled events mc = ["trueRa", "trueDec", "trueE", "ow"] return drop_fields(rot, mc)
def read_positions(): head,points1 = csv_parse.read('../Baltay-fibers_random.csv',delimiter=' ') head,points2 = csv_parse.read('../Baltay-fibers_residual.csv',delimiter=' ') points2 = rec.drop_fields(points2,('r','theta')) points2['Number'] += 10000 # to distinguish them from the "randoms" points = np.hstack((points1,points2)) return points
def remove_columns(self, col_names=None): ''' This function will remove the all the columns within with names in col_names from all the datasets in self.columnar_data. Parameters ---------- col_names : string or list The name or names of columns to be removed ''' if col_names != None: if type(col_names) == str: col_names = [col_names] else: col_names = list(col_names) # Format column names col_names = ff.format_headers(col_names) removed_data = [] for data in self.columnar_data: removed_data.append(drop_fields(data, col_names)) self.columnar_data = removed_data
def read_originals(): """Return the originally defined fiber positions, sorted by x and y.""" head, points1 = csv_parse.read("../Baltay-fibers_random.csv", delimiter=" ") head, points2 = csv_parse.read("../Baltay-fibers_residual.csv", delimiter=" ") points2 = recfunc.drop_fields(points2, ("r", "theta")) points2["Number"] += 10000 # to distinguish them from the "randoms" return np.hstack((points1, points2))
def deepLosslessDecompress(f, group): # rebuild group hierarchy if __basegroup_name__ in f.keys(): uncollapseGroups(f, f[__basegroup_name__]) paths = findDatasets(f, group) paths = [path for path in paths if "Basecall" in path] sampleRate = f["UniqueGlobalKey/channel_id"].attrs["sampling_rate"] for path in paths: if f[path].parent.parent.attrs.__contains__("event_detection"): # index back to event detection dataset = f[path].value if "mean" not in dataset.dtype.names: eventDetectionPath = findDatasets(f, "all", entry_point=f[path].parent.parent.attrs.get("event_detection"))[0] eventData = f[eventDetectionPath].value try: start = eventData["start"] + f[eventDetectionPath].attrs["picopore.start_index"] del f[eventDetectionPath].attrs["picopore.start_index"] eventData = drop_fields(eventData, ["start"]) eventData = append_fields(eventData, ["start"], [start], [getDtype(start)]) rewriteDataset(f, eventDetectionPath, compression="gzip", compression_opts=1, dataset=eventData) except KeyError: # must have been compressed without start indexing pass try: start_index = f[path].attrs["picopore.start_index"] del f[path].attrs["picopore.start_index"] except KeyError: # must have been compressed without start indexing start_index=0 start = dataset["start"][0] + start_index end = dataset["start"][-1] + start_index # constrain to range in basecall eventData = eventData[np.logical_and(eventData["start"] >= start, eventData["start"] <= end)] # remove missing events i=0 keepIndex = [] for time in dataset["start"]: while eventData["start"][i] != time + start_index and i < eventData.shape[0]: i += 1 keepIndex.append(i) eventData = eventData[keepIndex] dataset = drop_fields(dataset, "start") start = [i/sampleRate for i in eventData["start"]] length = [i/sampleRate for i in eventData["length"]] dataset = append_fields(dataset, ["mean", "start", "stdv", "length"], [eventData["mean"], start, eventData["stdv"], length]) rewriteDataset(f, path, dataset=dataset) return losslessDecompress(f, group)
def load_arff(filename): data_struct = loadarff(filename)[0] # FIXME: field may not be named 'class' data_labels = data_struct['class'] data = rfn.drop_fields(data_struct, 'class').view( np.float64).reshape(data_struct.shape + (-1, )) return data, data_labels
def __update(s): # Remove inactive channels names = s.records.dtype.names s.records = rcf.drop_fields(s.records, drop_names=s.inactive) s.chans = [s.chans[i] for i in xrange(len(names)) if names[i] not in s.inactive] s.__refresh_active()
def create_basecall_1d_output(raw_events, scale, path, model, post=None): """Create the annotated event table and basecalling summaries similiar to chimaera. :param raw_events: :class:`np.ndarray` with fields mean, stdv, start and, length fields. :param scale: :class:`dragonet.basecall.scaling.Scaler` object (or object with attributes `shift`, `scale`, `drift`, `var`, `scale_sd`, `var_sd`, and `var_sd`. :param path: list containing state indices with respect to `model`. :param model: `:class:dragonet.util.model.Model` object. :param post: Two-dimensional :class:`np.ndarray` containing posteriors (event, state). :param quality_data: :class:np.ndarray Array containing quality_data, used to annotate events. :returns: A tuple of: * the annotated input event table * a dict of result """ events = raw_events.copy() model_state = np.array(map (lambda x: model[x]['kmer'], path)) raw_model_level = np.array(map (lambda x: model[x]['level_mean'], path)) move = np.array(list(kmer_overlap_gen(model_state))) counts = np.bincount(move) stays = counts[0] skips = counts[2] if len(counts) > 2 else 0 # Extend the event table read_start = events[0]['start'] model_level = scale.shift + scale.scale * raw_model_level +\ scale.drift * (events['start'] - read_start) new_columns = ['model_state', 'model_level', 'move'] column_data = [model_state, model_level, move] if post is not None: weights = np.sum(post, axis=1) new_columns.append('weights') column_data.append(weights) drop_first = set(new_columns) & set(events.dtype.names) events = nprf.drop_fields(events, drop_first) table = nprf.append_fields(events, new_columns, data=column_data, asrecarray=True) # Compile the results results = { 'num_events': events.size, 'called_events': events.size, 'shift': scale.shift, 'scale': scale.scale, 'drift': scale.drift, 'var': scale.var, 'scale_sd': scale.scale_sd, 'var_sd': scale.var_sd, 'num_stays': stays, 'num_skips': skips } return table, results
def convert(ifile): folder = "/lustre/scratch/astro/cs390/LGalaxies_Hen15_PublicRelease/MergerTrees/MR/treedata/" lastsnap = 63 alistfile = "/lustre/scratch/astro/cs390/LGalaxies_Hen15_PublicRelease/input/zlists/zlist_MR.txt" f = h5py.File(folder+'/trees_'+str(ifile)+".hdf5", 'w') # Version f.attrs.create('Version', 0, dtype=numpy.int32) # Subversion f.attrs.create('Subversion', 1, dtype=numpy.int32) # Title f.attrs.create('Title', "The Mighty Peter") # Description f.attrs.create('Description', "This is for testing") # BoxsizeMpc -- I'm not convinced that we should use Mpc instead Mpc/h (It's quite difficult to remember) # so I will use Mpc/h to avoid the errors from myself f.attrs.create('BoxsizeMpc_h', 62.5, dtype=numpy.float32) # OmegaBaryon f.attrs.create('OmegaBaryon', 0.044, dtype=numpy.float32) # OmegaCDM f.attrs.create('OmegaCDM', 0.27-0.044, dtype=numpy.float32) # H100 f.attrs.create('H100', 0.704, dtype=numpy.float32) # Sigma8 f.attrs.create('Sigma8', 0.807, dtype=numpy.float32) #Group -- Snapshot snapshot_grp = f.create_group("Snapshots") (nsnaps,snapshot_data) = load_snapshot(alistfile) #NSnap print numpy.int32(nsnaps) snapshot_grp.attrs['NSnap'] = numpy.int32(nsnaps) #Snap snapshot_snap = snapshot_grp.create_dataset('Snap', data=snapshot_data) #Group -- MergerTrees mergertree_grp = f.create_group("MergerTrees") verbose = 1 print "Reading tree",ifile (nTrees,nHalos,nTreeHalos,output_Halos,output_HaloIDs) = read_lgal_input_fulltrees_withids(folder,lastsnap,ifile,verbose) print "Done reading tree",ifile #TableFlag mergertree_grp.attrs['TableFlag'] = numpy.int32(1) #NTree mergertree_grp.attrs['NTrees'] = numpy.int32(nTrees) #NHalo mergertree_grp.attrs['NHalos'] = numpy.int32(nHalos) #NHalosInTree nhalosintree_data = mergertree_grp.create_dataset('NHalosInTree', data=nTreeHalos.astype(numpy.int32)) #Halo print "Merging arrays" #halo = rfn.merge_arrays((output_Halos,output_HaloIDs), flatten = True, usemask = False) halo = join_struct_arrays((output_Halos,output_HaloIDs)) print "Done merging arrays" halo = rfn.drop_fields(halo,['dummy','PeanoKey']) print "Outputting merger trees" nhalosintree_data = mergertree_grp.create_dataset('Halo', data=halo) print "Done"
def plot_cdfs(x, y, ccdf=False): '''plot cumulative density functions for each column in x, based on the classification specified in y. Parameters ---------- x : recarray the experiments to use in the cdfs y : ndaray the categorization for the data ccdf : bool, optional if true, plot a complementary cdf instead of a normal cdf. Returns ------- a matplotlib Figure instance ''' x = rf.drop_fields(x, "scenario_id", asrecarray=True) uncs = rf.get_names(x.dtype) cp = sns.color_palette() n_col = 4 n_row = math.ceil(len(uncs) / n_col) size = 3 aspect = 1 figsize = n_col * size * aspect, n_row * size fig, axes = plt.subplots(n_row, n_col, figsize=figsize, squeeze=False) for i, unc in enumerate(uncs): discrete = False i_col = i % n_col i_row = i // n_col ax = axes[i_row, i_col] data = x[unc] if x.dtype[unc] == np.dtype('O'): discrete = True plot_individual_cdf(ax, unc, data, y, discrete, ccdf=ccdf) # last row might contain empty axis, # let's make them disappear for j_col in range(i_col + 1, n_col): ax = axes[i_row, j_col] ax.set_xticklabels([]) ax.set_xticks([]) ax.set_yticklabels([]) ax.set_yticks([]) sns.despine(ax=ax, top=True, right=True, left=True, bottom=True) proxies, labels = build_legend(x, y) fig.legend(proxies, labels, "upper center") return fig
def subj_by_subj_map_init(self, runs=2, verbose=-1, **map_kwargs): """ initializing nodes by finding the MAP for each subject separately Input: runs - number of MAP runs for each subject map_kwargs - other arguments that will be passes on to the map function Note: This function should be run prior to the nodes creation, i.e. before running mcmc() or map() """ # check if nodes were created. if they were it cause problems for deepcopy assert (not self.nodes), "function should be used before nodes are initialized." # init subjs = self._subjs n_subjs = len(subjs) empty_s_model = deepcopy(self) empty_s_model.is_group_model = False del empty_s_model._num_subjs, empty_s_model._subjs, empty_s_model.data self.create_nodes() # loop over subjects for i_subj in range(n_subjs): # create and fit single subject if verbose > 1: print "*!*!* fitting subject %d *!*!*" % subjs[i_subj] t_data = self.data[self.data['subj_idx'] == subjs[i_subj]] t_data = rec.drop_fields(t_data, ['data_idx']) s_model = deepcopy(empty_s_model) s_model.data = t_data s_model.map(method='fmin_powell', runs=runs, **map_kwargs) # copy to original model for (name, node) in s_model.group_nodes.iteritems(): self.subj_nodes[name][i_subj].value = node.value #set group and var nodes for (param_name, d) in self.params_dict.iteritems(): for (tag, nodes) in d.subj_nodes.iteritems(): subj_values = [x.value for x in nodes] #set group node if d.group_nodes: d.group_nodes[tag].value = np.mean(subj_values) #set var node if d.var_nodes: if d.var_type == 'std': d.var_nodes[tag].value = np.std(subj_values) elif d.var_type == 'precision': d.var_nodes[tag].value = np.std(subj_values)**-2 elif d.var_type == 'sample_size': v = np.var(subj_values) m = np.mean(subj_values) d.var_nodes[tag].value = (m * (1 - m)) / v - 1 else: raise ValueError, "unknown var_type"
def plot_cdfs(x, y, ccdf=False): '''plot cumulative density functions for each column in x, based on the classification specified in y. Parameters ---------- x : recarray the experiments to use in the cdfs y : ndaray the categorization for the data ccdf : bool, optional if true, plot a complementary cdf instead of a normal cdf. ''' x = rf.drop_fields(x, "scenario_id", asrecarray=True) uncs = rf.get_names(x.dtype) cp = sns.color_palette() n_col = 4 n_row = len(uncs)//n_col +1 size = 3 aspect = 1 figsize = n_col * size * aspect, n_row * size fig, axes = plt.subplots(n_row, n_col, figsize=figsize, squeeze=False) for i, unc in enumerate(uncs): discrete = False i_col = i % n_col i_row = i // n_col ax = axes[i_row, i_col] data = x[unc] if x.dtype[unc] == np.dtype('O'): discrete = True plot_cdf(ax, unc, data, y, discrete, ccdf=ccdf) # last row might contain empty axis, # let's make them disappear i_row = len(uncs) // n_col i_col = len(uncs) % n_col for i_col in range(i_col, n_col): ax = axes[i_row, i_col] ax.set_xticklabels([]) ax.set_xticks([]) ax.set_yticklabels([]) ax.set_yticks([]) sns.despine(ax=ax, top=True, right=True, left=True, bottom=True) proxies, labels = build_legend(x, y) fig.legend(proxies, labels, "upper center") return fig
def read_positions(): head, points1 = csv_parse.read('../Baltay-fibers_random.csv', delimiter=' ') head, points2 = csv_parse.read('../Baltay-fibers_residual.csv', delimiter=' ') points2 = rec.drop_fields(points2, ('r', 'theta')) points2['Number'] += 10000 # to distinguish them from the "randoms" points = np.hstack((points1, points2)) return points
def _convert_event_fields(self, read_events, sample_rate): """Convert event fields 'start' and 'length' from raw indices into times """ # convert event fields 'start' and 'length' from raw indices into times for col in ['start', 'length']: times = read_events[col] / sample_rate read_events = drop_fields(read_events, col, usemask=False) read_events = append_fields(read_events, col, times, usemask=False) return read_events
def read_array_info(entry): data = try_read(files.read_array_info, "array_info", entry.array_info) info = recfunctions.stack_arrays([ build_detname(data.info.det_uid, entry), recfunctions.drop_fields(data.info, "det_uid"), ]) return dataset.DataSet([ dataset.DataField("array_info",data), dataset.DataField("entry", entry)])
def drop_extra_columns(self): """Remove any optional columns from this CopyNumArray. Returns a new copy with only the core columns retained: log2 value, chromosome, start, end, bin name. """ result = self.__class__(self.sample_id) result.data = rfn.drop_fields(self.data, self._xtra) return result
def drop_extra_columns(self): """Remove any optional columns from this CopyNumArray. Returns a new copy with only the core columns retained: log2 value, chromosome, start, end, bin name. """ result = self.__class__(self.sample_id) result.data = rfn.drop_fields(self.data, self._xtra) return result
def _add_channel_states(self, fh, meta): """Add mux, channel and channel states to meta, with special handling of mux 0. For mux 0, find out what last well was (mux 1-4) and look for all states since when the mux was set to zero. If with_mux_changes is True, add a table of all mux-change entries with times, and mux-state values (non-enumerated values, thus allowing for the distinction between e.g. 1:common_voltage_1 and 6:unblock_voltage_1), which both enumerate to well_id 1. """ mux = fh.get_mux(self.channel, time=meta['start_time']) times = meta['start_time'], meta['start_time'] + meta['duration'] if mux == 0: # find out what the last well was, and when it was set. Note the mux could still be zero, # if the well was off from the start of the run mux, mux_set_time = fh.get_mux(self.channel, time=meta['start_time'], wells_only=True, return_raw_index=True) mux_set_time = float( mux_set_time ) / fh.sample_rate # convert from raw index to time # look for any channel states which might have caused mux to change to zero (e.g. saturated / multiple), # i.e. look in time window from mux change to end of read states = fh.get_states_in_window( self.channel, times=(mux_set_time, meta['start_time'] + meta['duration'])) else: states = fh.get_states_in_window(self.channel, times=times) mux_changes = fh.get_mux_changes_in_window(self.channel, times=times) # ensure 'well_id' changes between rows of the mux_changes struct array mux_changes = get_changes(mux_changes, use_cols=('well_id', )) logger.debug('mux changes from {} to {}: {}'.format( times[0], times[1], mux_changes)) change_times = mux_changes['approx_raw_index'] / fh.sample_rate mux_changes = drop_fields(mux_changes, 'approx_raw_index', usemask=False) mux_changes = append_fields(mux_changes, 'time', change_times, usemask=False) meta.update({ 'mux': mux, 'states': states, 'channel': self.channel, 'mux_changes': mux_changes, 'bias_voltage_changes': fh.get_bias_voltage_changes_in_window(times=times) })
def read_targets_in_box(hpdirname, radecbox=[0., 360., -90., 90.], columns=None): """Read in targets in an RA/Dec box. Parameters ---------- hpdirname : :class:`str` Full path to either a directory containing targets that have been partitioned by HEALPixel (i.e. as made by `select_targets` with the `bundle_files` option). Or the name of a single file of targets. radecbox : :class:`list`, defaults to the entire sky 4-entry list of coordinates [ramin, ramax, decmin, decmax] forming the edges of a box in RA/Dec (degrees). columns : :class:`list`, optional Only read in these target columns. Returns ------- :class:`~numpy.ndarray` An array of targets in the passed RA/Dec box. """ # ADM we'll need RA/Dec for final cuts, so ensure they're read. addedcols = [] columnscopy = None if columns is not None: # ADM make a copy of columns, as it's a kwarg we'll modify. columnscopy = columns.copy() for radec in ["RA", "DEC"]: if radec not in columnscopy: columnscopy.append(radec) addedcols.append(radec) # ADM if a directory was passed, do fancy HEALPixel parsing... if os.path.isdir(hpdirname): # ADM approximate nside for area of passed box. nside = pixarea2nside(box_area(radecbox)) # ADM HEALPixels that touch the box for that nside. pixlist = hp_in_box(nside, radecbox) # ADM read in targets in these HEALPixels. targets = read_targets_in_hp(hpdirname, nside, pixlist, columns=columnscopy) # ADM ...otherwise just read in the targets. else: targets = fitsio.read(hpdirname, columns=columnscopy) # ADM restrict only to targets in the requested RA/Dec box... ii = is_in_box(targets, radecbox) # ADM ...and remove RA/Dec columns if we added them. targets = rfn.drop_fields(targets[ii], addedcols) return targets
def updateNames(self, rename: dict) -> None: datas = self.field("laserdata") for i in range(len(datas)): remove = [ name for name in datas[i].dtype.names if name not in rename ] datas[i] = rfn.drop_fields(datas[i], remove, usemask=False) datas[i] = rfn.rename_fields(datas[i], rename) self.setField("laserdata", datas) self.setElidedNames(datas[0].dtype.names)
def VecAssoc2BalrogIndex(header, ndata, label, index_key='balrog_index'): pos = None for name in header.keys(): if header[name] == index_key: pos = int(name[1:]) break if pos!=None: if label!='des': index = ndata['VECTOR_ASSOC'][:, pos] ndata = recfunctions.append_fields(ndata, index_key, index, usemask=False) ndata = recfunctions.drop_fields(ndata, 'VECTOR_ASSOC', usemask=False) return ndata
def test_stack(): rec = rnp.root2rec(load('test.root')) s = rnp.stack([rec, rec]) assert_equal(s.shape[0], 2 * rec.shape[0]) assert_equal(s.dtype.names, rec.dtype.names) s = rnp.stack([rec, rec], fields=['x', 'y']) assert_equal(s.shape[0], 2 * rec.shape[0]) assert_equal(s.dtype.names, ('x', 'y')) # recs don't have identical fields rec2 = recfunctions.drop_fields(rec, ['i', 'x']) s = rnp.stack([rec, rec2]) assert_equal(set(s.dtype.names), set(['y', 'z']))
def test_stack(): rec = rnp.root2rec(load('test.root')) s = rnp.stack([rec, rec]) assert_equal(s.shape[0], 2 * rec.shape[0]) assert_equal(s.dtype.names, rec.dtype.names) s = rnp.stack([rec, rec], fields=['x', 'y']) assert_equal(s.shape[0], 2 * rec.shape[0]) assert_equal(s.dtype.names, ('x', 'y')) # recs don't have identical fields rec2 = recfunctions.drop_fields(rec, ['i', 'x']) s = rnp.stack([rec, rec2]) assert_equal(set(s.dtype.names), set(['y', 'z']))
def classify_line(filename, classifier): """ Use `classifier` to classify data stored in `filename` Args: filename (str): filename of stored results classifier (sklearn classifier): pre-trained classifier """ z = np.load(filename) rec = z['record'] if rec.shape[0] == 0: logger.debug('No records in {f}. Continuing'.format(f=filename)) return # Rescale intercept term coef = rec['coef'].copy() # copy so we don't transform npz coef coef[:, 0, :] = (coef[:, 0, :] + coef[:, 1, :] * ((rec['start'] + rec['end']) / 2.0)[:, np.newaxis]) # Include RMSE for full X matrix newdim = (coef.shape[0], coef.shape[1] * coef.shape[2]) X = np.hstack((coef.reshape(newdim), rec['rmse'])) # Create output and classify classes = classifier.classes_ classified = np.zeros(rec.shape[0], dtype=[ ('class', 'u2'), ('class_proba', 'float32', classes.size) ]) classified['class'] = classifier.predict(X) classified['class_proba'] = classifier.predict_proba(X) # Replace with new classification if exists, or add by merging if ('class' in rec.dtype.names and 'class_proba' in rec.dtype.names and rec['class_proba'].shape[1] == classes.size): rec['class'] = classified['class'] rec['class_proba'] = classified['class_proba'] else: # Drop incompatible classified results if needed # e.g., if the number of classes changed if 'class' in rec.dtype.names and 'class_proba' in rec.dtype.names: rec = nprfn.drop_fields(rec, ['class', 'class_proba']) rec = nprfn.merge_arrays((rec, classified), flatten=True) # Create dict for re-saving `npz` file (only way to append) out = {} for k, v in z.iteritems(): out[k] = v out['classes'] = classes out['record'] = rec np.savez(filename, **out)
def assign_and_drop(sam_ev, inj_ra, inj_dec): r""" Assign sampled ra/dec positions and drop mc fields from injected sample. This replaces the rotate function in the PointSourceInjector class. """ # Assign sampled locations from src map. sam_ev["ra"] = inj_ra sam_ev["dec"] = inj_dec # Drop MC fields from the injected events mc_names = ['ow', 'trueDec', 'trueE', 'trueRa'] return drop_fields(sam_ev, mc_names)
def assign_and_drop(sam_ev, inj_ra, inj_dec): r""" Assign sampled ra/dec positions and drop mc fields from injected sample. This replaces the rotate function in the PointSourceInjector class. """ # Assign sampled locations from src map. sam_ev["ra"] = inj_ra sam_ev["dec"] = inj_dec # Drop MC fields from the injected events mc_names = ['ow', 'trueDec', 'trueE', 'trueRa'] return drop_fields(sam_ev, mc_names)
def split_up_data_by_field(self, split_columns=None): ''' This function will take in the split-columns list and and split the data into separate arrays based on the list. For example, if one were to pass in dbh1, dbh2, dbh3 three copies of the data would be made, each being identical except that each would only contain one of the instances of dbh. One could also pass [(dbh1, recr1), (dbh2, recr2), (dbh3, recr3)]. All other fields in split_columns will be excluded other than the fields within the tuple under consideration. Parameters ---------- split_columns : list a list of tuples specifying the columns by which to split the array Notes ----- Saves the split array as self.columnar_data. ''' #Note: If they enter the wrong column name nothing will be removed #Should I error check for this? if split_columns != None: # Check if split_columns is a list of strings. If so, change it # into a list of tuples split_columns = [(s,) if type(s) == str else tuple(s) for s in split_columns] # Format the names in each tuple split_columns = [tuple(ff.format_headers(nms)) for nms in split_columns] split_data = [] given_col_names = [] for tup in split_columns: for name in tup: given_col_names.append(name) given_col_names = np.array(given_col_names) for data in self.columnar_data: for tup in split_columns: ind = np.ones(len(given_col_names), dtype=bool) for name in tup: ind = np.bitwise_and((name != given_col_names), ind) remove_names = given_col_names[ind] split_data.append(drop_fields(data, list(remove_names))) self.columnar_data = split_data
def drop_columns(self, colnames, **kwargs): """Drop columns from the table. See the docs for ``numpy.lib.recfunctions.drop_fields`` for an explanation of the remaining options. """ new_arr = rfn.drop_fields( self, colnames, usemask=False, asrecarray=True, **kwargs ) return self.__class__( new_arr, h5loc=self.h5loc, split_h5=self.split_h5, name=self.name, h5singleton=self.h5singleton )
def remove_cols(M, col_names): """Remove columns specified by col_names from structured array Parameters ---------- M : numpy.ndarray structured array col_names : list of str names for columns to remove Returns ------- numpy.ndarray structured array without columns """ M, col_names = check_consistent(M, col_names=col_names) return nprf.drop_fields(M, col_names, usemask=False)
def evaluate_population_outcome(population, ri, toolbox, ensemble): ''' Helper function for evaluating a population in case of outcome optimization Parameters ---------- population : list the population to evaluate ri : int reporting interval toolbox : deap toolbox instance ensemble : ModelEnsemble instance the ensemble instance running the optimization ''' cases = [dict(member) for member in population] experiments, outcomes = ensemble.perform_experiments(cases, reporting_interval=ri) # TODO:: model en policy moeten er wel in blijven, # dit stelt je in staat om ook over policy en models heen te kijken # naar wat het optimimum is. Dus je moet aan x # standaard alle models en alle policies toevoegen en dan pas # je index opvragen # Dit levert wel 2 extra geneste loops op... experiments = recfunctions.drop_fields(experiments,\ drop_names=['model', 'policy'], asrecarray = True) ordering = [entry[0] for entry in experiments.dtype.descr] experiments = experiments.tolist() indices = {tuple(experiments[i]):i for i in range(len(experiments))} # we need to map the outcomes of the x back to the # correct individual for member in population: index = tuple([member[entry] for entry in ordering]) associated_index = indices[index] member_outcomes = {} for key, value in outcomes.items(): member_outcomes[key] = value[associated_index] member.fitness.values = toolbox.evaluate(member_outcomes)