Esempio n. 1
0
    def test_drop_fields(self):
        # Test drop_fields
        a = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
                     dtype=[('a', int), ('b', [('ba', float), ('bb', int)])])

        # A basic field
        test = drop_fields(a, 'a')
        control = np.array([((2, 3.0), ), ((5, 6.0), )],
                           dtype=[('b', [('ba', float), ('bb', int)])])
        assert_equal(test, control)

        # Another basic field (but nesting two fields)
        test = drop_fields(a, 'b')
        control = np.array([(1, ), (4, )], dtype=[('a', int)])
        assert_equal(test, control)

        # A nested sub-field
        test = drop_fields(a, [
            'ba',
        ])
        control = np.array([(1, (3.0, )), (4, (6.0, ))],
                           dtype=[('a', int), ('b', [('bb', int)])])
        assert_equal(test, control)

        # All the nested sub-field from a field: zap that field
        test = drop_fields(a, ['ba', 'bb'])
        control = np.array([(1, ), (4, )], dtype=[('a', int)])
        assert_equal(test, control)

        test = drop_fields(a, ['a', 'b'])
        assert_(test is None)
def _build_trajectories(data):
    # """
    # build_trajectories(data) is responsible for the book keeping of the trajectories,
    # using the prev,next fields in the frames,creating new set of data, in the form of trajectories
    # """
    # first frame, initialization
    trajid = 0  # running trajid counter
    frame = data[0]
    frame.trajid = n.nan * n.empty_like(frame.x)
    ind = frame.next > -2
    frame.trajid[ind] = range(trajid, trajid + ind.size)
    trajid = trajid + ind.size

    for i, frame in data[1:]:
        frame.trajid = n.nan * n.empty_like(frame.x)
        old = frame.prev > -1
        frame.trajid[old] = data[i - 1].trajid[frame.prev[old]]
        ind = frame.prev < 0 and frame.next > -2
        frame.trajid[ind] = range(trajid, trajid + ind.size)
        trajid = trajid + ind.size
        drop_fields(frame, ['prev', 'next'])

    for frame in data:
        frame = frame[~n.isnan(frame)]
        frame.t = frame.t * n.ones_like(frame.x)

    return data
Esempio n. 3
0
    def test_drop_fields(self):
        # Test drop_fields
        a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=[("a", int), ("b", [("ba", float), ("bb", int)])])

        # A basic field
        test = drop_fields(a, "a")
        control = np.array([((2, 3.0),), ((5, 6.0),)], dtype=[("b", [("ba", float), ("bb", int)])])
        assert_equal(test, control)

        # Another basic field (but nesting two fields)
        test = drop_fields(a, "b")
        control = np.array([(1,), (4,)], dtype=[("a", int)])
        assert_equal(test, control)

        # A nested sub-field
        test = drop_fields(a, ["ba"])
        control = np.array([(1, (3.0,)), (4, (6.0,))], dtype=[("a", int), ("b", [("bb", int)])])
        assert_equal(test, control)

        # All the nested sub-field from a field: zap that field
        test = drop_fields(a, ["ba", "bb"])
        control = np.array([(1,), (4,)], dtype=[("a", int)])
        assert_equal(test, control)

        test = drop_fields(a, ["a", "b"])
        assert_(test is None)
Esempio n. 4
0
    def test_drop_fields(self):
        # Test drop_fields
        a = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
                     dtype=[('a', int), ('b', [('ba', float), ('bb', int)])])

        # A basic field
        test = drop_fields(a, 'a')
        control = np.array([((2, 3.0),), ((5, 6.0),)],
                           dtype=[('b', [('ba', float), ('bb', int)])])
        assert_equal(test, control)

        # Another basic field (but nesting two fields)
        test = drop_fields(a, 'b')
        control = np.array([(1,), (4,)], dtype=[('a', int)])
        assert_equal(test, control)

        # A nested sub-field
        test = drop_fields(a, ['ba', ])
        control = np.array([(1, (3.0,)), (4, (6.0,))],
                           dtype=[('a', int), ('b', [('bb', int)])])
        assert_equal(test, control)

        # All the nested sub-field from a field: zap that field
        test = drop_fields(a, ['ba', 'bb'])
        control = np.array([(1,), (4,)], dtype=[('a', int)])
        assert_equal(test, control)

        test = drop_fields(a, ['a', 'b'])
        assert_(test is None)
    def test_drop_fields(self):
        # Test drop_fields
        a = np.array(
            [(1, (2, 3.0)), (4, (5, 6.0))],
            dtype=[("a", int), ("b", [("ba", float), ("bb", int)])],
        )

        # A basic field
        test = drop_fields(a, "a")
        control = np.array([((2, 3.0), ), ((5, 6.0), )],
                           dtype=[("b", [("ba", float), ("bb", int)])])
        assert_equal(test, control)

        # Another basic field (but nesting two fields)
        test = drop_fields(a, "b")
        control = np.array([(1, ), (4, )], dtype=[("a", int)])
        assert_equal(test, control)

        # A nested sub-field
        test = drop_fields(a, [
            "ba",
        ])
        control = np.array([(1, (3.0, )), (4, (6.0, ))],
                           dtype=[("a", int), ("b", [("bb", int)])])
        assert_equal(test, control)

        # All the nested sub-field from a field: zap that field
        test = drop_fields(a, ["ba", "bb"])
        control = np.array([(1, ), (4, )], dtype=[("a", int)])
        assert_equal(test, control)

        test = drop_fields(a, ["a", "b"])
        assert_(test is None)
def _build_trajectories(data):
# """
# build_trajectories(data) is responsible for the book keeping of the trajectories, 
# using the prev,next fields in the frames,creating new set of data, in the form of trajectories
# """
	# first frame, initialization
	trajid = 0 # running trajid counter
	frame = data[0]
	frame.trajid = n.nan*n.empty_like(frame.x)
	ind = frame.next > -2
	frame.trajid[ind] = range(trajid,trajid+ind.size)
	trajid = trajid+ind.size


	for i,frame in data[1:]:
		frame.trajid = n.nan*n.empty_like(frame.x)
		old = frame.prev > -1
		frame.trajid[old] = data[i-1].trajid[frame.prev[old]]
		ind = frame.prev < 0 and frame.next > -2
		frame.trajid[ind] = range(trajid,trajid+ind.size)
		trajid = trajid+ind.size
		drop_fields(frame,['prev','next'])
		
	for frame in data:
		frame = frame[~n.isnan(frame)]
		frame.t = frame.t*n.ones_like(frame.x)
	
	return data
    def get_raw_chamber_data(self,filtered_data):
        # chamber_dtype = numpy.dtype([('time_secs', '<u4'),
        #                              ('time_nsecs', '<u4'),
        #                              ('time_rel', '<f4'),
        #                              ('status', '|S25'),
        #                              ('tunnel', '<u2'),
        #                              ('fly_x', '<f4'),
        #                              ('fly_y', '<f4'),
        #                              ('fly_angle', '<f4'),
        #                              ])
        header = list(FILE_TOOLS.chamber_dtype.names)
        tracking_chamber_data = filtered_data[filtered_data['status'] != 'Walk To End']
        tracking_chamber_data = tracking_chamber_data[header]
        tracking_chamber_data = tracking_chamber_data.astype(FILE_TOOLS.chamber_dtype)
        tracking_chamber_data['tunnel'] = tracking_chamber_data['tunnel']+1
        indicies = tracking_chamber_data['status'] == 'End Chamber Ethanol'
        raw_chamber_data_ethanol = tracking_chamber_data[indicies]
        raw_chamber_data_ethanol = recfunctions.drop_fields(raw_chamber_data_ethanol,
                                                            'status',
                                                            usemask=False)
        status_array = numpy.array(['Ethanol']*len(raw_chamber_data_ethanol),dtype='|S25')
        raw_chamber_data_ethanol = recfunctions.append_fields(raw_chamber_data_ethanol,
                                                              'status',
                                                              status_array,
                                                              dtypes='|S25',
                                                              usemask=False)
        raw_chamber_data = raw_chamber_data_ethanol

        ethanol_start_time = raw_chamber_data_ethanol['time_rel'][0]
        indicies = tracking_chamber_data['status'] == 'End Chamber Air'
        indicies &= tracking_chamber_data['time_rel'] < ethanol_start_time
        raw_chamber_data_air_before = tracking_chamber_data[indicies]
        raw_chamber_data_air_before = recfunctions.drop_fields(raw_chamber_data_air_before,
                                                               'status',
                                                               usemask=False)
        status_array = numpy.array(['AirBefore']*len(raw_chamber_data_air_before),dtype='|S25')
        raw_chamber_data_air_before = recfunctions.append_fields(raw_chamber_data_air_before,
                                                                 'status',
                                                                 status_array,
                                                                 dtypes='|S25',
                                                                 usemask=False)
        raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data_air_before,raw_chamber_data),usemask=False)

        indicies = tracking_chamber_data['status'] == 'End Chamber Air'
        indicies &= tracking_chamber_data['time_rel'] > ethanol_start_time
        raw_chamber_data_air_after = tracking_chamber_data[indicies]
        raw_chamber_data_air_after = recfunctions.drop_fields(raw_chamber_data_air_after,
                                                               'status',
                                                               usemask=False)
        status_array = numpy.array(['AirAfter']*len(raw_chamber_data_air_after),dtype='|S25')
        raw_chamber_data_air_after = recfunctions.append_fields(raw_chamber_data_air_after,
                                                                 'status',
                                                                 status_array,
                                                                 dtypes='|S25',
                                                                 usemask=False)
        raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data,raw_chamber_data_air_after),usemask=False)

        return raw_chamber_data
Esempio n. 8
0
def mese_followup(**kwargs):
    livetime = 988.54
    livetime += 358.402
    livetime += 368.381
    print("\tLoading MESE with 2 follow-up years...")
    exp = np.append(np.load(os.path.join(path, "MESE_exp.npy")),
                    np.load(os.path.join(path, "MESE_followup_exp.npy")))

    mc = np.load(os.path.join(path, "MESE_MC.npy"))

    if "dist" in exp.dtype.names:
        exp = drop_fields(exp, ["dist"], usemask=False)
    if "dist" in mc.dtype.names:
        mc = drop_fields(mc, ["dist"], usemask=False)

    sinDec = kwargs.pop("sinDec", [-1., hem])
    exp = exp[(exp["sinDec"] > sinDec[0]) & (exp["sinDec"] < sinDec[-1])]
    mc = mc[(mc["sinDec"] > sinDec[0]) & (mc["sinDec"] < sinDec[-1])]

    dec_bins = np.unique(
        np.concatenate([
            np.linspace(-1., -0.93, 4 + 1),
            np.linspace(-0.93, hem, 12 + 1),
        ]))

    dec_bins = dec_bins[(dec_bins >= sinDec[0]) & (dec_bins <= sinDec[1])]
    dec_bins = np.unique(np.concatenate([sinDec, dec_bins]))
    dec_bins_logE = np.linspace(-1., hem, 4 + 1)
    dec_bins_logE = dec_bins_logE[(dec_bins_logE >= sinDec[0])
                                  & (dec_bins_logE <= sinDec[1])]
    dec_bins_logE = np.unique(np.concatenate([sinDec, dec_bins_logE]))

    energy_bins = [
        np.linspace(2., 8.5, 67 + 1),
        np.linspace(-1., hem, 4 + 1),
    ]
    mc = mc[mc["logE"] > 1.]
    llh_model = EnergyLLH(twodim_bins=energy_bins, sinDec_bins=dec_bins)

    if "upscale" in kwargs and kwargs["upscale"] is not None and (
            kwargs["upscale"] or not type(kwargs["upscale"]) == bool):
        lt = kwargs.pop("livetime", livetime)
        kwargs["upscale"] = (int(kwargs.pop("upscale")), lt)

    kwargs.pop("livetime", None)
    kwargs.setdefault("mode", "all")
    kwargs.setdefault("seed", 20101112)
    llh = StackingPointSourceLLH(exp,
                                 mc,
                                 livetime,
                                 llh_model=llh_model,
                                 **kwargs)

    print("{0:>80s}".format("[done]"))
    return llh
Esempio n. 9
0
def ic86_2012_bdt(**kwargs):
    livetime = 331.88
    print("\tLoading IC86-II...")
    exp = np.load(os.path.join(path, "IC86-2012_exp.npy"))
    mc = np.load(os.path.join(path, "IC86-2012_MC.npy"))

    sinDec = kwargs.pop("sinDec", [-1., 1.])
    exp = exp[(exp["sinDec"] > sinDec[0]) & (exp["sinDec"] < sinDec[-1])]
    mc = mc[(mc["sinDec"] > sinDec[0]) & (mc["sinDec"] < sinDec[-1])]

    exp = drop_fields(exp, ["BDT2", "perc"])
    mc = drop_fields(mc, ["BDT2", "perc"])

    dec_bins = np.unique(
        np.concatenate([
            np.linspace(-1., -0.92, 5 + 1),
            np.linspace(-0.92, -0.15, 10 + 1),
            np.linspace(-0.15, 0.01, 10 + 1),
            np.linspace(0.01, 1., 20 + 1),
        ]))
    dec_bins = dec_bins[(dec_bins >= sinDec[0]) & (dec_bins <= sinDec[1])]
    dec_bins = np.unique(np.concatenate([sinDec, dec_bins]))

    X = np.concatenate([exp["BDT"], mc["BDT"]])
    bdt_bins = np.percentile(X, [0., 20., 40., 60., 80., 100.])

    energy_bdt_bins = [
        np.linspace(1., 10., 40 + 1),
        np.concatenate([[bdt_bins[0] - (bdt_bins[1] - bdt_bins[0])], bdt_bins,
                        [bdt_bins[-1] + bdt_bins[-1] - bdt_bins[-2]]]),
        dec_bins
    ]

    llh_model = EnergyBDTLLH(bins=energy_bdt_bins, sinDec_bins=dec_bins)

    if "upscale" in kwargs and kwargs["upscale"] is not None and (
            kwargs["upscale"] or not type(kwargs["upscale"]) == bool):
        lt = kwargs.pop("livetime", livetime)
        kwargs["upscale"] = (int(kwargs.pop("upscale")), lt)

    kwargs.pop("livetime", None)

    kwargs.setdefault("seed", 2012)

    llh = StackingPointSourceLLH(exp,
                                 mc,
                                 livetime,
                                 llh_model=llh_model,
                                 **kwargs)

    print("{0:>80s}".format("[done]"))
    return llh
Esempio n. 10
0
def setup_cart(results, classify, incl_unc=[], mass_min=0.05):
    """helper function for performing cart
    
    Parameters
    ----------
    results : tuple of structured array and dict with numpy arrays
              the return from :meth:`perform_experiments`.
    classify : string, function or callable
               either a string denoting the outcome of interest to 
               use or a function. 
    incl_unc : list of strings
    mass_min : float
    
    
    Raises
    ------
    TypeError 
        if classify is not a string or a callable.
    
    """
    
    if not incl_unc:
        x = np.ma.array(results[0])
    else:
        drop_names = set(recfunctions.get_names(results[0].dtype))-set(incl_unc)
        x = recfunctions.drop_fields(results[0], drop_names, asrecarray = True)
    if type(classify)==types.StringType:
        y = results[1][classify]
    elif callable(classify):
        y = classify(results[1])
    else:
        raise TypeError("unknown type for classify")
    
    return CART(x, y, mass_min)
Esempio n. 11
0
def deepLosslessCompress(f, group):
	paths = findDatasets(f, group, "Events")
	paths = [path for path in paths if "Basecall" in path]
	# index event detection
	if "UniqueGlobalKey/channel_id" in f:
		sampleRate = f["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
		for path in paths:
			if f[path].parent.parent.attrs.__contains__("event_detection"):
				# index back to event detection
				dataset = f[path].value
				start = np.array([int(round(sampleRate * i)) for i in dataset["start"]])
				dataset = indexToZero(f, path, "start", dataColumn=start)
				move = dataset["move"] # rewrite move dataset because it's int64 for max 2
				# otherwise, event by event
				dataset = drop_fields(dataset, ["mean", "stdv", "length", "move"])
				dataset = append_fields(dataset, ["move"], [move], [getDtype(move)])
				rewriteDataset(f, path, compression="gzip", compression_opts=9, dataset=dataset)
				# rewrite eventdetection too - start is also way too big here
				eventDetectionPath = findDatasets(f, "all", entry_point=f[path].parent.parent.attrs.get("event_detection"))[0]
				if "picopore.start_index" not in f[eventDetectionPath].attrs.keys():
					eventData = indexToZero(f, eventDetectionPath, "start")
					rewriteDataset(f, eventDetectionPath, compression="gzip", compression_opts=9, dataset=eventData)
				
	if __basegroup_name__ not in f:
		f.create_group(__basegroup_name__)
		for name, group in f.items():
			if name != __basegroup_name__:
				recursiveCollapseGroups(f, __basegroup_name__, name, group)
	return losslessCompress(f, group)
Esempio n. 12
0
    def __init__(self, x, y, mass_min=0.05, mode=sdutil.BINARY):
        ''' init

        '''
        x = recfunctions.drop_fields(x, "scenario_id", asrecarray=True)

        self.x = x
        self.y = y
        self.mass_min = mass_min
        self.mode = mode

        # we need to transform the structured array to a ndarray
        # we use dummy variables for each category in case of categorical
        # variables. Integers are treated as floats
        self.feature_names = []
        columns = []
        for unc, dtype in x.dtype.descr:
            dtype = x.dtype.fields[unc][0]
            if dtype == np.object:
                categories = sorted(list(set(x[unc])))
                for cat in categories:
                    label = '{}{}{}'.format(unc, self.sep, cat)
                    self.feature_names.append(label)
                    columns.append(x[unc] == cat)
            else:
                self.feature_names.append(unc)
                columns.append(x[unc])

        self._x = np.column_stack(columns)
        self._boxes = None
        self._stats = None
def data_save(data, output_filename):
    # This isn't too hard, except we're going to put a copy of the
    # measures we actually care about at the beginning!
    names = list(data.dtype.names)
    
    # Find all the columns that have 'av' in their title and not
    # and not '_mask'
    drop_names = [ name for name in names if (name.find('_av_') == -1) | (name.find('_mask') > 0) ]
    drop_names.pop(0)

    important_data = rec.drop_fields(data, drop_names, usemask=False, asrecarray=True)
    
    names = list(important_data.dtype.names)
    
    # Strip the beginning part to get shorter and easy to manage variable names
    names[1:] = [ name[6:] for name in names[1:] ]
    names[1:] = [ name[:(-8)] for name in names[1:] ]
    names[1:] = [ name[0].upper() + name[1:] + 'Cort' for name in names[1:] ]
    names[0] = 'SubID'
    important_data.dtype.names = names

    # Create two temporaray output_filenames:
    temp_filename1 = output_filename + '_temp1'
    temp_filename2 = output_filename + '_temp2'
    
    plt.rec2csv(data, temp_filename1, delimiter='\t', formatd=None, withheader=True)
    plt.rec2csv(important_data, temp_filename2, delimiter='\t', formatd=None, withheader=True)
    
    mcf.KW_paste(temp_filename2, temp_filename1, output_filename)
    mcf.KW_rmforce(temp_filename1)
    mcf.KW_rmforce(temp_filename2)
Esempio n. 14
0
def produce_trial(
    analysis: Analysis,
    flux_norm: float = 0,
    random_seed: Optional[int] = None,
    n_signal_observed: Optional[int] = None,
    verbose: bool = False,
    **kwargs,
) -> np.ndarray:
    """Produces a single trial of background+signal events based on inputs.

    Args:
        analysis:
        flux_norm: A flux normaliization to adjust weights.
        random_seed: A seed value for the numpy RNG.
        n_signal_observed:
        verbose: A flag to print progress.

    Returns:
        An array of combined signal and background events.
    """
    # kwargs no-op
    len(kwargs)

    if random_seed is not None:
        np.random.seed(random_seed)

    background = analysis.model.inject_background_events()
    background['time'] = analysis.model.scramble_times(background['time'])

    if flux_norm > 0 or n_signal_observed is not None:
        signal = analysis.model.inject_signal_events(
            flux_norm,
            n_signal_observed,
        )

        signal['time'] = analysis.model.scramble_times(
            signal['time'],
            background=False,
        )

    else:
        signal = np.empty(0, dtype=background.dtype)

    if verbose:
        print(f'number of background events: {len(background)}')
        print(f'number of signal events: {len(signal)}')

    # Because we want to return the entire event and not just the
    # number of events, we need to do some numpy magic. Specifically,
    # we need to remove the fields in the simulated events that are
    # not present in the data events. These include the true direction,
    # energy, and 'oneweight'.
    signal = rf.drop_fields(
        signal,
        [n for n in signal.dtype.names if n not in background.dtype.names])

    # Combine the signal background events and time-sort them.
    # Use recfunctions.stack_arrays to prevent numpy from scrambling entry order
    events = rf.stack_arrays([background, signal], autoconvert=True)
    return events
Esempio n. 15
0
def format_data(outcomes, experiments, var):
    x = experiments.astype(float)
    y = outcomes.ix[:, var].values
    x = x.to_records()
    x = recfunctions.drop_fields(x, 'index')
    results = (x, {'y': y})
    return results
Esempio n. 16
0
def _prepare_experiments(experiments):
    '''
    transform the experiments structured array into a numpy array.

    Parameters
    ----------
    experiments : structured array
    
    Returns
    -------
    ndarray
    
    '''
    experiments = recfunctions.drop_fields(experiments, "scenario_id", 
                                           asrecarray=True)
    uncs = recfunctions.get_names(experiments.dtype)

    temp_experiments = np.zeros((experiments.shape[0], len(uncs)))
    
    for i, u in enumerate(uncs):
        try: 
            temp_experiments[:,i] = experiments[u].astype(np.float)
        except ValueError:
            
            data = experiments[u]
            entries = sorted(list(set(data)))
            
            for j, entry in enumerate(entries):
                temp_experiments[data==entry,i] = j
    
    return temp_experiments, uncs
Esempio n. 17
0
    def __init__(self, x, y, mass_min=0.05, mode=sdutil.BINARY):
        ''' init

        '''
        x = recfunctions.drop_fields(x, "scenario_id", asrecarray=True)

        self.x = x
        self.y = y
        self.mass_min = mass_min
        self.mode = mode

        # we need to transform the structured array to a ndarray
        # we use dummy variables for each category in case of categorical
        # variables. Integers are treated as floats
        self.feature_names = []
        columns = []
        for unc, dtype in x.dtype.descr:
            dtype = x.dtype.fields[unc][0]
            if dtype == np.object:
                categories = sorted(list(set(x[unc])))
                for cat in categories:
                    label = '{}{}{}'.format(unc, self.sep, cat)
                    self.feature_names.append(label)
                    columns.append(x[unc] == cat)
            else:
                self.feature_names.append(unc)
                columns.append(x[unc])

        self._x = np.column_stack(columns)
        self._boxes = None
        self._stats = None
 def build_signal_TS(self,
                     signal_trials=200,
                     result=False,
                     result_file=None):
     r'''build signal TS distribution
     args:
     signal_trials: Number of trials
     result: Whether storing the full result in self.result.Default is False.
     result_file:Whether storing the full result in file.Default is False.
     
     return:
     TS: The TS array
     '''
     TS = []
     ts_result = []
     for i in range(signal_trials):
         data = self.draw_data()
         signal = self.draw_signal()
         signal = rf.drop_fields(signal, [n for n in signal.dtype.names \
         if not n in data.dtype.names])
         self.point_source.update_data(np.concatenate([data, signal]))
         TS.append(self.point_source.eval_llh_fit_ns()[1])
         ts_result.append(self.point_source.get_fit_result)
     if result:
         np.save(result_file, np.array(ts_result))
     return np.array(TS)
Esempio n. 19
0
def rotate_struct(ev, ra, dec):
    r"""Wrapper around the rotate-method in skylab.utils for structured
    arrays.

    Parameters
    ----------
    ev : structured array
        Event information with ra, sinDec, plus true information

    ra, dec : float
        Coordinates to rotate the true direction onto

    Returns
    --------
    ev : structured array
        Array with rotated value, true information is deleted

    """
    names = ev.dtype.names

    rot = np.copy(ev)

    # Function call
    rot["ra"], rot_dec = rotate(ev["trueRa"], ev["trueDec"],
                                ra * np.ones(len(ev)), dec * np.ones(len(ev)),
                                ev["ra"], np.arcsin(ev["sinDec"]))

    if "dec" in names:
        rot["dec"] = rot_dec
    rot["sinDec"] = np.sin(rot_dec)

    # "delete" Monte Carlo information from sampled events
    mc = ["trueRa", "trueDec", "trueE", "ow"]

    return drop_fields(rot, mc)
Esempio n. 20
0
def mergeTimeColumns(data, years, months, days, hours):
    datetimes = []
    hours2 = []
    for idx in range(data.shape[0]):
        datetimes.append(
            datetime.datetime(years[idx], months[idx], days[idx], hours[idx]))
        hours2.append((datetimes[idx] - datetimes[0]).total_seconds() / 3600.0)

    data = recfunctions.append_fields(data,
                                      data=datetimes,
                                      names="datetimes",
                                      dtypes='M8[us]')
    data = recfunctions.append_fields(data,
                                      data=hours2,
                                      names="hours",
                                      dtypes='int64')

    data = recfunctions.drop_fields(data,
                                    drop_names=('year', 'month', 'day',
                                                'hour'))
    print "	Data set after merging time columns:"

    print "%-20s %-20s %-20s %-40s" % ("\tColumn Name", "Number of Elements",
                                       "Data Type", "Nulls found")
    for name in data.dtype.names:
        print "%-20s %-20s %-20s %-40s" % ("\t" + name, str(len(
            data[name])), data[name].dtype, "and TODO number of nulls")

    return data
Esempio n. 21
0
def appendFieldsToRecarray(recarray, data, fieldnames):
    """
    Return recarray with new fields appended, will override if exists.

    :param recarray: Recarray to append to
    :type  recarray: list
    :param data: Data
    :type  data: list or np.array
    :param fieldnames: Names of new column in numpy.recarray
    :type  fieldnames: list of str
    :return: Recarray with new field appended
    :rtype: numpy.recarray
    """
    from numpy.lib.recfunctions import append_fields, drop_fields
    if isinstance(data, list):
        if recarray.size != len(data[0]):
            print("Warning: Cannot append array of size " + str(len(data)) +
                  " to recarray of size " + str(recarray.size))
            return recarray
    else:
        if recarray.size != data.size:
            print("Warning: Cannot append array of size " + str(data.size) +
                  " to recarray of size " + str(recarray.size))
            return recarray
    rec = drop_fields(recarray, fieldnames)
    dtypes = ['f4'] * len(fieldnames)
    rec = append_fields(rec,
                        np.array(fieldnames),
                        data,
                        dtypes=dtypes,
                        asrecarray=True,
                        usemask=False)
    return rec
Esempio n. 22
0
def appendFieldToRecarray(recarray, data, fieldname):
    """
    Return recarray with new field appended, will override if exists.

    :param recarray: Recarray to append to
    :type  recarray: list
    :param data: Data
    :type  data: list
    :param fieldname: Name of new column in numpy.recarray
    :type  fieldname: str
    :return: Recarray with new field appended
    :rtype: numpy.recarray
    """
    from numpy.lib.recfunctions import append_fields, drop_fields
    if recarray.size != data.size:
        printWarning("Cannot append array of size " + str(data.size) +
                     " to recarray of size " + str(recarray.size))
        return recarray
    rec = drop_fields(recarray, fieldname)
    rec = append_fields(rec,
                        fieldname,
                        data,
                        dtypes='f4',
                        asrecarray=True,
                        usemask=False)
    return rec
Esempio n. 23
0
    def _rotate_subset(self, value, orig_experiments, logical): 
        '''
        rotate a subset
        
        Parameters
        ----------
        value : list of strings
        orig_experiment : numpy structured array
        logical : boolean array
        
        '''
        list_dtypes = [(name, "<f8") for name in value]
        
        #cast everything to float
        drop_names = set(rf.get_names(orig_experiments.dtype)) - set(value)
        orig_subset = rf.drop_fields(orig_experiments, drop_names, 
                                               asrecarray=True)
        subset_experiments = orig_subset.astype(list_dtypes).view('<f8').reshape(orig_experiments.shape[0], len(value))
 
        #normalize the data
        mean = np.mean(subset_experiments,axis=0)
        std = np.std(subset_experiments, axis=0)
        std[std==0] = 1 #in order to avoid a devision by zero
        subset_experiments = (subset_experiments - mean)/std
        
        #get the experiments of interest
        experiments_of_interest = subset_experiments[logical]
        
        #determine the rotation
        rotation_matrix =  self._determine_rotation(experiments_of_interest)
        
        #apply the rotation
        subset_experiments = np.dot(subset_experiments,rotation_matrix)
        return rotation_matrix, subset_experiments
Esempio n. 24
0
def merge_cort(data, cortisol_filename):
    
    cort_data = np.genfromtxt(cortisol_filename, dtype=None, names=True, delimiter='\t')
    
    names = list(cort_data.dtype.names)
    
    # Find all the columns in cort_data that have 'av' in their title
    # and not '_mask'
    drop_names = names[8:]

    cort_data = nprf.drop_fields(cort_data, drop_names, usemask=False, asrecarray=True)
    
    data = nprf.join_by('SubID', data, cort_data, jointype='leftouter',
                            r1postfix='KW', r2postfix='KW2', usemask=False,asrecarray=True)
    
    # Bizzarely, the join_by function pads with the biggest numbers it can think of!
    # So we're going to replace everything over 999 with 999
    for name in names[1:8]:
        data[name][data[name]>999] = 999
    
    # Define a UsableCort field: 1 if ANY of the cortisol values are not 999
    cort_array = np.vstack( [ data[name] for name in names[1:8]])
    usable_cort_array = np.ones(cort_array.shape[1])
    usable_cort_array[np.any(cort_array<>999, axis=0)] = 1
    
    data = nprf.append_fields(base = data, names='UsableCort', data = usable_cort_array, usemask=False)

    return data
def DoJoin(balrog, row, size, odir, zz, names, end=None, cols=False, field=None):
    if not os.path.exists(odir):
        os.makedirs(odir)

    if end is None:
        end = row + len(zz)

    if end > size:
        end = size
    ee = end - row

    b = balrog[-1].read(rows=np.arange(row,end))
    d = []
    for name in names:
        d.append(zz[name][:ee])

    n = list(names)
    n.append('field')
    d.append(np.array([field]*len(b)))

    c = rec.append_fields(b, n, d)
    if 'table' in c.dtype.names:
        c = rec.drop_fields(c, 'table')

    ofile = os.path.join(odir, '%i-%i.fits'%(row,end))
    esutil.io.write(ofile, c, clobber=True)
    
    if cols:
        return end, c.dtype.names
    else:
        return end
Esempio n. 26
0
 def __init__(self, filename, date_sep='-', time_sep=':', format='stroke_DC3'):
     """ Load NLDN data from a file, into a numpy named array stored in the
         *data* attribute. *data*['time'] is relative to the *basedate* datetime
         attribute
         """
     self.format=format
     
     dtype_specs = getattr(self, format)
     
     
     nldn_initial = np.genfromtxt(filename, dtype=dtype_specs['columns'])
     date_part = np.genfromtxt(nldn_initial['date'],
                     delimiter=date_sep, dtype=dtype_specs['date_dtype'])
     time_part = np.genfromtxt(nldn_initial['time'],
                     delimiter=time_sep, dtype=dtype_specs['time_dtype'])
     dates = [datetime(a['year'], a['month'], a['day'], b['hour'], b['minute']) 
                 for a, b in zip(date_part, time_part)]
     min_date = min(dates)
     min_date = datetime(min_date.year, min_date.month, min_date.day)
     t = np.fromiter( ((d-min_date).total_seconds() for d in dates), dtype='float64')
     t += time_part['second']
     
     self.basedate = min_date
     data = drop_fields(nldn_initial, ('date', 'time'))
     data = append_fields(data, 'time', t)
     
     self.data = data
     
Esempio n. 27
0
    def __init__(self,
                 filename,
                 date_sep='-',
                 time_sep=':',
                 format='stroke_DC3'):
        """ Load NLDN data from a file, into a numpy named array stored in the
            *data* attribute. *data*['time'] is relative to the *basedate* datetime
            attribute
            """
        self.format = format

        dtype_specs = getattr(self, format)

        nldn_initial = np.genfromtxt(filename, dtype=dtype_specs['columns'])
        date_part = np.genfromtxt(nldn_initial['date'],
                                  delimiter=date_sep,
                                  dtype=dtype_specs['date_dtype'])
        time_part = np.genfromtxt(nldn_initial['time'],
                                  delimiter=time_sep,
                                  dtype=dtype_specs['time_dtype'])
        dates = [
            datetime(a['year'], a['month'], a['day'], b['hour'], b['minute'])
            for a, b in zip(date_part, time_part)
        ]
        min_date = min(dates)
        min_date = datetime(min_date.year, min_date.month, min_date.day)
        t = np.fromiter(((d - min_date).total_seconds() for d in dates),
                        dtype='float64')
        t += time_part['second']

        self.basedate = min_date
        data = drop_fields(nldn_initial, ('date', 'time'))
        data = append_fields(data, 'time', t)

        self.data = data
Esempio n. 28
0
def rotate_struct(ev, ra, dec):
    r"""Wrapper around the rotate-method in skylab.utils for structured
    arrays.

    Parameters
    ----------
    ev : structured array
        Event information with ra, sinDec, plus true information

    ra, dec : float
        Coordinates to rotate the true direction onto

    Returns
    --------
    ev : structured array
        Array with rotated value, true information is deleted

    """
    names = ev.dtype.names

    rot = np.copy(ev)

    # Function call
    rot["ra"], rot_dec = rotate(ev["trueRa"], ev["trueDec"],
                                ra * np.ones(len(ev)), dec * np.ones(len(ev)),
                                ev["ra"], np.arcsin(ev["sinDec"]))

    if "dec" in names:
        rot["dec"] = rot_dec
    rot["sinDec"] = np.sin(rot_dec)

    # "delete" Monte Carlo information from sampled events
    mc = ["trueRa", "trueDec", "trueE", "ow"]

    return drop_fields(rot, mc)
Esempio n. 29
0
def read_positions():
    head,points1 = csv_parse.read('../Baltay-fibers_random.csv',delimiter=' ')
    head,points2 = csv_parse.read('../Baltay-fibers_residual.csv',delimiter=' ')
    points2 = rec.drop_fields(points2,('r','theta'))
    points2['Number'] += 10000 # to distinguish them from the "randoms"
    points = np.hstack((points1,points2))
    return points
Esempio n. 30
0
    def remove_columns(self, col_names=None):
        '''
        This function will remove the all the columns within with names in
        col_names from all the datasets in self.columnar_data.

        Parameters
        ----------
        col_names : string or list
            The name or names of columns to be removed

        '''
        
        if col_names != None:

            if type(col_names) == str:
                col_names = [col_names]
            else:
                col_names = list(col_names)

            # Format column names
            col_names = ff.format_headers(col_names)

            removed_data = []
            for data in self.columnar_data:
                removed_data.append(drop_fields(data, col_names))
            self.columnar_data = removed_data
def read_originals():
    """Return the originally defined fiber positions, sorted by x and y."""
    head, points1 = csv_parse.read("../Baltay-fibers_random.csv", delimiter=" ")
    head, points2 = csv_parse.read("../Baltay-fibers_residual.csv", delimiter=" ")
    points2 = recfunc.drop_fields(points2, ("r", "theta"))
    points2["Number"] += 10000  # to distinguish them from the "randoms"
    return np.hstack((points1, points2))
Esempio n. 32
0
def deepLosslessDecompress(f, group):
	# rebuild group hierarchy
	if __basegroup_name__ in f.keys():
		uncollapseGroups(f, f[__basegroup_name__])	
	paths = findDatasets(f, group)
	paths = [path for path in paths if "Basecall" in path]
	sampleRate = f["UniqueGlobalKey/channel_id"].attrs["sampling_rate"]
	for path in paths:
		if f[path].parent.parent.attrs.__contains__("event_detection"):
			# index back to event detection
			dataset = f[path].value
			if "mean" not in dataset.dtype.names:
				eventDetectionPath = findDatasets(f, "all", entry_point=f[path].parent.parent.attrs.get("event_detection"))[0]
				eventData = f[eventDetectionPath].value
				try:
					start = eventData["start"] + f[eventDetectionPath].attrs["picopore.start_index"]
					del f[eventDetectionPath].attrs["picopore.start_index"]
					eventData = drop_fields(eventData, ["start"])
					eventData = append_fields(eventData, ["start"], [start], [getDtype(start)])
					rewriteDataset(f, eventDetectionPath, compression="gzip", compression_opts=1, dataset=eventData)
				except KeyError:
					# must have been compressed without start indexing
					pass
				try:
					start_index = f[path].attrs["picopore.start_index"]
					del f[path].attrs["picopore.start_index"]
				except KeyError:
					# must have been compressed without start indexing
					start_index=0
				start = dataset["start"][0] + start_index
				end = dataset["start"][-1] + start_index
				# constrain to range in basecall
				eventData = eventData[np.logical_and(eventData["start"] >= start, eventData["start"] <= end)]
				# remove missing events
				i=0
				keepIndex = []
				for time in dataset["start"]:
					while eventData["start"][i] != time + start_index and i < eventData.shape[0]:
						i += 1
					keepIndex.append(i)
				eventData = eventData[keepIndex]
				dataset = drop_fields(dataset, "start")
				start = [i/sampleRate for i in eventData["start"]]
				length = [i/sampleRate for i in eventData["length"]]
				dataset = append_fields(dataset, ["mean", "start", "stdv", "length"], [eventData["mean"], start, eventData["stdv"], length])	
				rewriteDataset(f, path, dataset=dataset)
	return losslessDecompress(f, group)
Esempio n. 33
0
def load_arff(filename):
    data_struct = loadarff(filename)[0]
    # FIXME: field may not be named 'class'
    data_labels = data_struct['class']
    data = rfn.drop_fields(data_struct, 'class').view(
        np.float64).reshape(data_struct.shape + (-1, ))

    return data, data_labels
Esempio n. 34
0
  def __update(s):

    # Remove inactive channels
    names = s.records.dtype.names
    s.records = rcf.drop_fields(s.records, drop_names=s.inactive)
    s.chans = [s.chans[i] for i in xrange(len(names)) if names[i] not in s.inactive]

    s.__refresh_active()
Esempio n. 35
0
def create_basecall_1d_output(raw_events, scale, path, model, post=None):
    """Create the annotated event table and basecalling summaries similiar to chimaera.

    :param raw_events: :class:`np.ndarray` with fields mean, stdv, start and,
        length fields.
    :param scale: :class:`dragonet.basecall.scaling.Scaler` object (or object
        with attributes `shift`, `scale`, `drift`, `var`, `scale_sd`, `var_sd`,
        and `var_sd`.
    :param path: list containing state indices with respect to `model`.
    :param model: `:class:dragonet.util.model.Model` object.
    :param post: Two-dimensional :class:`np.ndarray` containing posteriors (event, state).
    :param quality_data: :class:np.ndarray Array containing quality_data, used to annotate events.

    :returns: A tuple of:

        * the annotated input event table
        * a dict of result
    """

    events = raw_events.copy()
    model_state = np.array(map (lambda x: model[x]['kmer'], path))
    raw_model_level = np.array(map (lambda x: model[x]['level_mean'], path))
    move = np.array(list(kmer_overlap_gen(model_state)))
    counts = np.bincount(move)
    stays = counts[0]
    skips = counts[2] if len(counts) > 2 else 0

    # Extend the event table
    read_start = events[0]['start']
    model_level = scale.shift + scale.scale * raw_model_level +\
                  scale.drift * (events['start'] - read_start)
    new_columns = ['model_state', 'model_level', 'move']
    column_data = [model_state, model_level, move]

    if post is not None:
        weights = np.sum(post, axis=1)
        new_columns.append('weights')
        column_data.append(weights)

    drop_first = set(new_columns) & set(events.dtype.names)
    events = nprf.drop_fields(events, drop_first)
    table = nprf.append_fields(events, new_columns, data=column_data, asrecarray=True)

    # Compile the results
    results = {
        'num_events': events.size,
        'called_events': events.size,
        'shift': scale.shift,
        'scale': scale.scale,
        'drift': scale.drift,
        'var': scale.var,
        'scale_sd': scale.scale_sd,
        'var_sd': scale.var_sd,
        'num_stays': stays,
        'num_skips': skips
    }

    return table, results
Esempio n. 36
0
def convert(ifile):
    folder = "/lustre/scratch/astro/cs390/LGalaxies_Hen15_PublicRelease/MergerTrees/MR/treedata/"
    lastsnap = 63
    alistfile = "/lustre/scratch/astro/cs390/LGalaxies_Hen15_PublicRelease/input/zlists/zlist_MR.txt"
    f = h5py.File(folder+'/trees_'+str(ifile)+".hdf5", 'w')
    # Version
    f.attrs.create('Version', 0, dtype=numpy.int32)
    # Subversion
    f.attrs.create('Subversion', 1, dtype=numpy.int32)
    # Title
    f.attrs.create('Title', "The Mighty Peter")
    # Description
    f.attrs.create('Description', "This is for testing")
    # BoxsizeMpc -- I'm not convinced that we should use Mpc instead Mpc/h (It's quite difficult to remember)
    # so I will use Mpc/h to avoid the errors from myself
    f.attrs.create('BoxsizeMpc_h', 62.5, dtype=numpy.float32)
    # OmegaBaryon
    f.attrs.create('OmegaBaryon', 0.044, dtype=numpy.float32)
    # OmegaCDM
    f.attrs.create('OmegaCDM', 0.27-0.044, dtype=numpy.float32)
    # H100
    f.attrs.create('H100', 0.704, dtype=numpy.float32)
    # Sigma8
    f.attrs.create('Sigma8', 0.807, dtype=numpy.float32)
    
    #Group -- Snapshot
    snapshot_grp = f.create_group("Snapshots")
    (nsnaps,snapshot_data) = load_snapshot(alistfile)
    #NSnap
    print numpy.int32(nsnaps)
    snapshot_grp.attrs['NSnap'] = numpy.int32(nsnaps)
    #Snap
    snapshot_snap = snapshot_grp.create_dataset('Snap', data=snapshot_data)

    #Group -- MergerTrees
    mergertree_grp = f.create_group("MergerTrees")
    verbose = 1
    print "Reading tree",ifile
    (nTrees,nHalos,nTreeHalos,output_Halos,output_HaloIDs) = read_lgal_input_fulltrees_withids(folder,lastsnap,ifile,verbose)
    print "Done reading tree",ifile
    #TableFlag
    mergertree_grp.attrs['TableFlag'] = numpy.int32(1)
    #NTree
    mergertree_grp.attrs['NTrees'] = numpy.int32(nTrees)
    #NHalo
    mergertree_grp.attrs['NHalos'] = numpy.int32(nHalos)
    #NHalosInTree

    nhalosintree_data = mergertree_grp.create_dataset('NHalosInTree', data=nTreeHalos.astype(numpy.int32))
    #Halo
    print "Merging arrays"
    #halo = rfn.merge_arrays((output_Halos,output_HaloIDs), flatten = True, usemask = False)
    halo = join_struct_arrays((output_Halos,output_HaloIDs))
    print "Done merging arrays"
    halo = rfn.drop_fields(halo,['dummy','PeanoKey'])
    print "Outputting merger trees"
    nhalosintree_data = mergertree_grp.create_dataset('Halo', data=halo)
    print "Done"
Esempio n. 37
0
def plot_cdfs(x, y, ccdf=False):
    '''plot cumulative density functions for each column in x, based on 
    the  classification specified in y.

    Parameters
    ----------
    x : recarray
        the experiments to use in the cdfs
    y : ndaray 
        the categorization for the data
    ccdf : bool, optional
           if true, plot a complementary cdf instead of a normal cdf.
           
           
    Returns
    -------
    a matplotlib Figure instance

    '''
    x = rf.drop_fields(x, "scenario_id", asrecarray=True)
    uncs = rf.get_names(x.dtype)
    cp = sns.color_palette()

    n_col = 4
    n_row = math.ceil(len(uncs) / n_col)
    size = 3
    aspect = 1
    figsize = n_col * size * aspect, n_row * size
    fig, axes = plt.subplots(n_row, n_col, figsize=figsize, squeeze=False)

    for i, unc in enumerate(uncs):
        discrete = False

        i_col = i % n_col
        i_row = i // n_col
        ax = axes[i_row, i_col]

        data = x[unc]
        if x.dtype[unc] == np.dtype('O'):
            discrete = True
        plot_individual_cdf(ax, unc, data, y, discrete, ccdf=ccdf)

    # last row might contain empty axis,
    # let's make them disappear
    for j_col in range(i_col + 1, n_col):
        ax = axes[i_row, j_col]
        ax.set_xticklabels([])
        ax.set_xticks([])
        ax.set_yticklabels([])
        ax.set_yticks([])

        sns.despine(ax=ax, top=True, right=True, left=True, bottom=True)

    proxies, labels = build_legend(x, y)

    fig.legend(proxies, labels, "upper center")

    return fig
Esempio n. 38
0
    def subj_by_subj_map_init(self, runs=2, verbose=-1, **map_kwargs):
        """
        initializing nodes by finding the MAP for each subject separately
        Input:
            runs - number of MAP runs for each subject
            map_kwargs - other arguments that will be passes on to the map function

        Note: This function should be run prior to the nodes creation, i.e.
        before running mcmc() or map()
        """

        # check if nodes were created. if they were it cause problems for deepcopy
        assert (not self.nodes), "function should be used before nodes are initialized."

        # init
        subjs = self._subjs
        n_subjs = len(subjs)

        empty_s_model = deepcopy(self)
        empty_s_model.is_group_model = False
        del empty_s_model._num_subjs, empty_s_model._subjs, empty_s_model.data

        self.create_nodes()

        # loop over subjects
        for i_subj in range(n_subjs):
            # create and fit single subject
            if verbose > 1: print "*!*!* fitting subject %d *!*!*" % subjs[i_subj]
            t_data = self.data[self.data['subj_idx'] == subjs[i_subj]]
            t_data = rec.drop_fields(t_data, ['data_idx'])
            s_model = deepcopy(empty_s_model)
            s_model.data = t_data
            s_model.map(method='fmin_powell', runs=runs, **map_kwargs)

            # copy to original model
            for (name, node) in s_model.group_nodes.iteritems():
                self.subj_nodes[name][i_subj].value = node.value

        #set group and var nodes
        for (param_name, d) in self.params_dict.iteritems():
            for (tag, nodes) in d.subj_nodes.iteritems():
                subj_values = [x.value for x in nodes]
                #set group node
                if d.group_nodes:
                    d.group_nodes[tag].value = np.mean(subj_values)
                #set var node
                if d.var_nodes:
                    if d.var_type == 'std':
                        d.var_nodes[tag].value = np.std(subj_values)
                    elif d.var_type == 'precision':
                        d.var_nodes[tag].value = np.std(subj_values)**-2
                    elif d.var_type == 'sample_size':
                        v = np.var(subj_values)
                        m = np.mean(subj_values)
                        d.var_nodes[tag].value = (m * (1 - m)) / v - 1
                    else:
                        raise ValueError, "unknown var_type"
Esempio n. 39
0
def plot_cdfs(x, y, ccdf=False):
    '''plot cumulative density functions for each column in x, based on the 
    classification specified in y.
    
    Parameters
    ----------
    x : recarray
        the experiments to use in the cdfs
    y : ndaray 
        the categorization for the data
    ccdf : bool, optional
           if true, plot a complementary cdf instead of a normal cdf.
    
    '''
    x = rf.drop_fields(x, "scenario_id", asrecarray=True)
    uncs = rf.get_names(x.dtype)
    cp = sns.color_palette()
    
    n_col = 4
    n_row = len(uncs)//n_col +1
    size = 3 
    aspect = 1
    figsize = n_col * size * aspect, n_row * size
    fig, axes = plt.subplots(n_row, n_col,
                             figsize=figsize,
                             squeeze=False)

    for i, unc in enumerate(uncs):
        discrete = False
        
        i_col = i % n_col
        i_row = i // n_col
        ax = axes[i_row, i_col]
        
        data = x[unc]
        if x.dtype[unc] == np.dtype('O'):
            discrete = True
        plot_cdf(ax, unc, data, y, discrete, ccdf=ccdf)
    
    # last row might contain empty axis, 
    # let's make them disappear
    i_row = len(uncs) // n_col
    i_col = len(uncs) % n_col
    for i_col in range(i_col, n_col):
        ax = axes[i_row, i_col]
        ax.set_xticklabels([])
        ax.set_xticks([])
        ax.set_yticklabels([])
        ax.set_yticks([])
        
        sns.despine(ax=ax, top=True, right=True, left=True, bottom=True)
    
    proxies, labels = build_legend(x, y)
    
    fig.legend(proxies, labels, "upper center")

    return fig
Esempio n. 40
0
def read_positions():
    head, points1 = csv_parse.read('../Baltay-fibers_random.csv',
                                   delimiter=' ')
    head, points2 = csv_parse.read('../Baltay-fibers_residual.csv',
                                   delimiter=' ')
    points2 = rec.drop_fields(points2, ('r', 'theta'))
    points2['Number'] += 10000  # to distinguish them from the "randoms"
    points = np.hstack((points1, points2))
    return points
Esempio n. 41
0
 def _convert_event_fields(self, read_events, sample_rate):
     """Convert event fields 'start' and 'length' from raw indices into times
     """
     # convert event fields 'start' and 'length' from raw indices into times
     for col in ['start', 'length']:
         times = read_events[col] / sample_rate
         read_events = drop_fields(read_events, col, usemask=False)
         read_events = append_fields(read_events, col, times, usemask=False)
     return read_events
Esempio n. 42
0
def read_array_info(entry):
	data = try_read(files.read_array_info, "array_info", entry.array_info)
	info = recfunctions.stack_arrays([
		build_detname(data.info.det_uid, entry),
		recfunctions.drop_fields(data.info, "det_uid"),
		])
	return dataset.DataSet([
		dataset.DataField("array_info",data),
		dataset.DataField("entry", entry)])
Esempio n. 43
0
    def drop_extra_columns(self):
        """Remove any optional columns from this CopyNumArray.

        Returns a new copy with only the core columns retained:
            log2 value, chromosome, start, end, bin name.
        """
        result = self.__class__(self.sample_id)
        result.data = rfn.drop_fields(self.data, self._xtra)
        return result
Esempio n. 44
0
    def drop_extra_columns(self):
        """Remove any optional columns from this CopyNumArray.

        Returns a new copy with only the core columns retained:
            log2 value, chromosome, start, end, bin name.
        """
        result = self.__class__(self.sample_id)
        result.data = rfn.drop_fields(self.data, self._xtra)
        return result
Esempio n. 45
0
    def _add_channel_states(self, fh, meta):
        """Add mux, channel and channel states to meta, with special handling of mux 0.

        For mux 0, find out what last well was (mux 1-4) and look for all states  since when the mux was set to zero.
        If with_mux_changes is True, add a table of all mux-change entries with times, and mux-state values (non-enumerated
        values, thus allowing for the distinction between e.g. 1:common_voltage_1 and 6:unblock_voltage_1), which both enumerate to well_id 1.


        """
        mux = fh.get_mux(self.channel, time=meta['start_time'])
        times = meta['start_time'], meta['start_time'] + meta['duration']
        if mux == 0:  # find out what the last well was, and when it was set. Note the mux could still be zero,
            # if the well was off from the start of the run
            mux, mux_set_time = fh.get_mux(self.channel,
                                           time=meta['start_time'],
                                           wells_only=True,
                                           return_raw_index=True)
            mux_set_time = float(
                mux_set_time
            ) / fh.sample_rate  # convert from raw index to time
            # look for any channel states which might have caused mux to change to zero (e.g. saturated / multiple),
            # i.e. look in time window from mux change to end of read
            states = fh.get_states_in_window(
                self.channel,
                times=(mux_set_time, meta['start_time'] + meta['duration']))
        else:
            states = fh.get_states_in_window(self.channel, times=times)

        mux_changes = fh.get_mux_changes_in_window(self.channel, times=times)
        # ensure 'well_id' changes between rows of the mux_changes struct array
        mux_changes = get_changes(mux_changes, use_cols=('well_id', ))
        logger.debug('mux changes from {} to {}: {}'.format(
            times[0], times[1], mux_changes))
        change_times = mux_changes['approx_raw_index'] / fh.sample_rate
        mux_changes = drop_fields(mux_changes,
                                  'approx_raw_index',
                                  usemask=False)
        mux_changes = append_fields(mux_changes,
                                    'time',
                                    change_times,
                                    usemask=False)

        meta.update({
            'mux':
            mux,
            'states':
            states,
            'channel':
            self.channel,
            'mux_changes':
            mux_changes,
            'bias_voltage_changes':
            fh.get_bias_voltage_changes_in_window(times=times)
        })
Esempio n. 46
0
def read_targets_in_box(hpdirname, radecbox=[0., 360., -90., 90.],
                        columns=None):
    """Read in targets in an RA/Dec box.

    Parameters
    ----------
    hpdirname : :class:`str`
        Full path to either a directory containing targets that
        have been partitioned by HEALPixel (i.e. as made by
        `select_targets` with the `bundle_files` option). Or the
        name of a single file of targets.
    radecbox : :class:`list`, defaults to the entire sky
        4-entry list of coordinates [ramin, ramax, decmin, decmax]
        forming the edges of a box in RA/Dec (degrees).
    columns : :class:`list`, optional
        Only read in these target columns.

    Returns
    -------
    :class:`~numpy.ndarray`
        An array of targets in the passed RA/Dec box.
    """
    # ADM we'll need RA/Dec for final cuts, so ensure they're read.
    addedcols = []
    columnscopy = None
    if columns is not None:
        # ADM make a copy of columns, as it's a kwarg we'll modify.
        columnscopy = columns.copy()
        for radec in ["RA", "DEC"]:
            if radec not in columnscopy:
                columnscopy.append(radec)
                addedcols.append(radec)

    # ADM if a directory was passed, do fancy HEALPixel parsing...
    if os.path.isdir(hpdirname):
        # ADM approximate nside for area of passed box.
        nside = pixarea2nside(box_area(radecbox))

        # ADM HEALPixels that touch the box for that nside.
        pixlist = hp_in_box(nside, radecbox)

        # ADM read in targets in these HEALPixels.
        targets = read_targets_in_hp(hpdirname, nside, pixlist,
                                     columns=columnscopy)
    # ADM ...otherwise just read in the targets.
    else:
        targets = fitsio.read(hpdirname, columns=columnscopy)

    # ADM restrict only to targets in the requested RA/Dec box...
    ii = is_in_box(targets, radecbox)
    # ADM ...and remove RA/Dec columns if we added them.
    targets = rfn.drop_fields(targets[ii], addedcols)

    return targets
Esempio n. 47
0
    def updateNames(self, rename: dict) -> None:
        datas = self.field("laserdata")
        for i in range(len(datas)):
            remove = [
                name for name in datas[i].dtype.names if name not in rename
            ]
            datas[i] = rfn.drop_fields(datas[i], remove, usemask=False)
            datas[i] = rfn.rename_fields(datas[i], rename)

        self.setField("laserdata", datas)
        self.setElidedNames(datas[0].dtype.names)
Esempio n. 48
0
def VecAssoc2BalrogIndex(header, ndata, label, index_key='balrog_index'):
    pos = None
    for name in header.keys():
        if header[name] == index_key:
            pos = int(name[1:])
            break
    if pos!=None:
        if label!='des':
            index = ndata['VECTOR_ASSOC'][:, pos]
            ndata = recfunctions.append_fields(ndata, index_key, index, usemask=False)
        ndata = recfunctions.drop_fields(ndata, 'VECTOR_ASSOC', usemask=False)
    return ndata
Esempio n. 49
0
def test_stack():
    rec = rnp.root2rec(load('test.root'))
    s = rnp.stack([rec, rec])
    assert_equal(s.shape[0], 2 * rec.shape[0])
    assert_equal(s.dtype.names, rec.dtype.names)
    s = rnp.stack([rec, rec], fields=['x', 'y'])
    assert_equal(s.shape[0], 2 * rec.shape[0])
    assert_equal(s.dtype.names, ('x', 'y'))
    # recs don't have identical fields
    rec2 = recfunctions.drop_fields(rec, ['i', 'x'])
    s = rnp.stack([rec, rec2])
    assert_equal(set(s.dtype.names), set(['y', 'z']))
Esempio n. 50
0
def test_stack():
    rec = rnp.root2rec(load('test.root'))
    s = rnp.stack([rec, rec])
    assert_equal(s.shape[0], 2 * rec.shape[0])
    assert_equal(s.dtype.names, rec.dtype.names)
    s = rnp.stack([rec, rec], fields=['x', 'y'])
    assert_equal(s.shape[0], 2 * rec.shape[0])
    assert_equal(s.dtype.names, ('x', 'y'))
    # recs don't have identical fields
    rec2 = recfunctions.drop_fields(rec, ['i', 'x'])
    s = rnp.stack([rec, rec2])
    assert_equal(set(s.dtype.names), set(['y', 'z']))
Esempio n. 51
0
def classify_line(filename, classifier):
    """ Use `classifier` to classify data stored in `filename`

    Args:
      filename (str): filename of stored results
      classifier (sklearn classifier): pre-trained classifier

    """
    z = np.load(filename)
    rec = z['record']

    if rec.shape[0] == 0:
        logger.debug('No records in {f}. Continuing'.format(f=filename))
        return

    # Rescale intercept term
    coef = rec['coef'].copy()  # copy so we don't transform npz coef
    coef[:, 0, :] = (coef[:, 0, :] + coef[:, 1, :] *
                     ((rec['start'] + rec['end']) / 2.0)[:, np.newaxis])

    # Include RMSE for full X matrix
    newdim = (coef.shape[0], coef.shape[1] * coef.shape[2])
    X = np.hstack((coef.reshape(newdim), rec['rmse']))

    # Create output and classify
    classes = classifier.classes_
    classified = np.zeros(rec.shape[0], dtype=[
        ('class', 'u2'),
        ('class_proba', 'float32', classes.size)
    ])
    classified['class'] = classifier.predict(X)
    classified['class_proba'] = classifier.predict_proba(X)

    # Replace with new classification if exists, or add by merging
    if ('class' in rec.dtype.names and 'class_proba' in rec.dtype.names and
            rec['class_proba'].shape[1] == classes.size):
        rec['class'] = classified['class']
        rec['class_proba'] = classified['class_proba']
    else:
        # Drop incompatible classified results if needed
        # e.g., if the number of classes changed
        if 'class' in rec.dtype.names and 'class_proba' in rec.dtype.names:
            rec = nprfn.drop_fields(rec, ['class', 'class_proba'])
        rec = nprfn.merge_arrays((rec, classified), flatten=True)

    # Create dict for re-saving `npz` file (only way to append)
    out = {}
    for k, v in z.iteritems():
        out[k] = v
    out['classes'] = classes
    out['record'] = rec

    np.savez(filename, **out)
Esempio n. 52
0
 def assign_and_drop(sam_ev, inj_ra, inj_dec):
     r"""
     Assign sampled ra/dec positions and drop mc fields from
     injected sample. This replaces the rotate function in the
     PointSourceInjector class.
     """
     # Assign sampled locations from src map.
     sam_ev["ra"] = inj_ra
     sam_ev["dec"] = inj_dec
     # Drop MC fields from the injected events
     mc_names = ['ow', 'trueDec', 'trueE', 'trueRa']
     return drop_fields(sam_ev, mc_names)
Esempio n. 53
0
 def assign_and_drop(sam_ev, inj_ra, inj_dec):
     r"""
     Assign sampled ra/dec positions and drop mc fields from
     injected sample. This replaces the rotate function in the
     PointSourceInjector class.
     """
     # Assign sampled locations from src map.
     sam_ev["ra"] = inj_ra
     sam_ev["dec"] = inj_dec
     # Drop MC fields from the injected events
     mc_names = ['ow', 'trueDec', 'trueE', 'trueRa']
     return drop_fields(sam_ev, mc_names)
Esempio n. 54
0
    def split_up_data_by_field(self, split_columns=None):
        '''
        This function will take in the split-columns list and and split the
        data into separate arrays based on the list.  For example, if one were
        to pass in dbh1, dbh2,  dbh3 three copies of the data would be
        made, each being identical except that each would only contain one of
        the instances of dbh. One could also pass [(dbh1, recr1), (dbh2, recr2),
        (dbh3, recr3)].  All other fields in split_columns will be excluded
        other than the fields within the tuple under consideration.

        Parameters
        ----------
        split_columns : list
            a list of tuples specifying the columns by which to split the array
        
        Notes
        -----
        Saves the split array as self.columnar_data.
        
        '''
        #Note: If they enter the wrong column name nothing will be removed
        #Should I error check for this?
        if split_columns != None:
            # Check if split_columns is a list of strings. If so, change it
            # into a list of tuples
            split_columns = [(s,) if type(s) == str else tuple(s) for s in 
                                                                 split_columns]
            
            # Format the names in each tuple
            split_columns = [tuple(ff.format_headers(nms)) for nms in
                                                                 split_columns]

            split_data = []
            given_col_names = []
            for tup in split_columns:
                for name in tup:
                    given_col_names.append(name)
            given_col_names = np.array(given_col_names)


            for data in self.columnar_data:
                for tup in split_columns:
                    ind = np.ones(len(given_col_names), dtype=bool)
                    for name in tup:
                        ind = np.bitwise_and((name != given_col_names), ind)
                    remove_names = given_col_names[ind]
                    split_data.append(drop_fields(data, list(remove_names)))
            self.columnar_data = split_data
Esempio n. 55
0
    def drop_columns(self, colnames, **kwargs):
        """Drop  columns from the table.

        See the docs for ``numpy.lib.recfunctions.drop_fields`` for an
        explanation of the remaining options.
        """
        new_arr = rfn.drop_fields(
            self, colnames, usemask=False, asrecarray=True, **kwargs
        )
        return self.__class__(
            new_arr,
            h5loc=self.h5loc,
            split_h5=self.split_h5,
            name=self.name,
            h5singleton=self.h5singleton
        )
Esempio n. 56
0
def remove_cols(M, col_names):
    """Remove columns specified by col_names from structured array

    Parameters
    ----------
    M : numpy.ndarray
        structured array
    col_names : list of str
        names for columns to remove

    Returns
    -------
    numpy.ndarray
        structured array without columns
    """
    M, col_names = check_consistent(M, col_names=col_names)
    return nprf.drop_fields(M, col_names, usemask=False)
def evaluate_population_outcome(population, ri, toolbox, ensemble):
    '''
    Helper function for evaluating a population in case of outcome optimization

    Parameters
    ----------
    population : list
                  the population to evaluate
    ri : int    
         reporting interval
    toolbox : deap toolbox instance
    ensemble : ModelEnsemble instance
               the ensemble instance running the optimization
    
    '''
    
    cases = [dict(member) for member in population]
    experiments, outcomes = ensemble.perform_experiments(cases,
                                                reporting_interval=ri)

    # TODO:: model en policy moeten er wel in blijven, 
    # dit stelt je in staat om ook over policy en models heen te kijken
    # naar wat het optimimum is. Dus je moet aan x
    # standaard alle models en alle policies toevoegen en dan pas 
    # je index opvragen
    # Dit levert wel 2 extra geneste loops op... 
    
    experiments = recfunctions.drop_fields(experiments,\
                                           drop_names=['model', 'policy'], 
                                           asrecarray = True)    
    ordering = [entry[0] for entry in experiments.dtype.descr]
    
    experiments = experiments.tolist()
    indices = {tuple(experiments[i]):i for i in range(len(experiments))}
    
    # we need to map the outcomes of the x back to the 
    # correct individual
    for member in population:
        index = tuple([member[entry] for entry in ordering])
        associated_index = indices[index]
        
        member_outcomes = {}
        for key, value in outcomes.items():
            member_outcomes[key] = value[associated_index]
            
        member.fitness.values = toolbox.evaluate(member_outcomes)