Exemple #1
0
def root2panda(files_path, tree_name, mask = False, **kwargs):
    '''
    Args:
    -----
        files_path: a string like './data/*.root', for example
        tree_name: a string like 'Collection_Tree' corresponding to the name of the folder inside the root 
                   file that we want to open
        kwargs: arguments taken by root2rec, such as branches to consider, etc
    Returns:
    --------    
        output_panda: a panda dataframe like allbkg_df in which all the info from the root file will be stored
    
    Note:
    -----
        if you are working with .root files that contain different branches, you might have to mask your data
        in that case, return pd.DataFrame(ss.data)
    '''
    
    files = glob.glob(files_path)

    # -- check whether a name was passed for the tree_name --> for root files with only one tree and no folders, 
    # -- you do not need to specify any name (I believe)
    if (tree_name == ''):
        ss = stack_arrays([root2rec(fpath, **kwargs) for fpath in files])
    else:
        ss = stack_arrays([root2rec(fpath, tree_name, **kwargs) for fpath in files])
    
    if (mask):
        return pd.DataFrame(ss.data)
    else:
        try:
            return pd.DataFrame(ss)
        except Exception, e:
            return pd.DataFrame(ss.data)
    def test_unnamed_and_named_fields(self):
        # Test combination of arrays w/ & w/o named fields
        (_, x, _, z) = self.data

        test = stack_arrays((x, z))
        control = ma.array(
            [(1, -1, -1), (2, -1, -1), (-1, "A", 1), (-1, "B", 2)],
            mask=[(0, 1, 1), (0, 1, 1), (1, 0, 0), (1, 0, 0)],
            dtype=[("f0", int), ("A", "|S3"), ("B", float)],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, x))
        control = ma.array(
            [("A", 1, -1), ("B", 2, -1), (-1, -1, 1), (-1, -1, 2)],
            mask=[(0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)],
            dtype=[("A", "|S3"), ("B", float), ("f2", int)],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, z, x))
        control = ma.array(
            [("A", 1, -1), ("B", 2, -1), ("A", 1, -1), ("B", 2, -1), (-1, -1, 1), (-1, -1, 2)],
            mask=[(0, 0, 1), (0, 0, 1), (0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)],
            dtype=[("A", "|S3"), ("B", float), ("f2", int)],
        )
        assert_equal(test, control)
def combine_datasets(dataset_list):
    """
    Definition:
    -----------
        Function that combines a list datasets into a single dataset
        Each of the inputs (and the output) should have the form {"X":data, "y":recarray, "w":recarray}
        This allows us to combine datasets from different input files

    Args:
    -----
        dataset_list = array of dictionaries of the form {"X":data, "y":recarray, "w":recarray}

    Returns:
    --------
        dictionary of the form {"X":data, "y":recarray, "w":recarray} containing all input information
    """
    # -- y and w are 1D arrays which are simple to combine
    y_combined = stack_arrays([dataset["y"] for dataset in dataset_list], asrecarray=True, usemask=False)
    w_combined = stack_arrays([dataset["w"] for dataset in dataset_list], asrecarray=True, usemask=False)

    # print dataset_list[0]["X"].dtype

    # -- Construct the desired output shape using the known size of y_combined
    #    Necessary shape is (N_elements, N_categories)
    X_shape = (y_combined.shape[0], dataset_list[0]["X"].shape[1])

    # -- Stack X arrays and then reshape
    X_combined = stack_arrays([dataset["X"] for dataset in dataset_list], asrecarray=True, usemask=False)
    X_combined.resize(X_shape)

    # -- Recombine into a dictionary and return
    return {"X": X_combined, "y": y_combined, "w": w_combined}
Exemple #4
0
    def test_matching_named_fields(self):
        # Test combination of arrays w/ matching field names
        (_, x, _, z) = self.data
        zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)],
                      dtype=[('A', '|S3'), ('B', float), ('C', float)])
        test = stack_arrays((z, zz))
        control = ma.array([('A', 1, -1), ('B', 2, -1),
                            (
                                'a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)],
                           dtype=[('A', '|S3'), ('B', float), ('C', float)],
                           mask=[(0, 0, 1), (0, 0, 1),
                                 (0, 0, 0), (0, 0, 0), (0, 0, 0)])
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, zz, x))
        ndtype = [('A', '|S3'), ('B', float), ('C', float), ('f3', int)]
        control = ma.array([('A', 1, -1, -1), ('B', 2, -1, -1),
                            ('a', 10., 100., -1), ('b', 20., 200., -1),
                            ('c', 30., 300., -1),
                            (-1, -1, -1, 1), (-1, -1, -1, 2)],
                           dtype=ndtype,
                           mask=[(0, 0, 1, 1), (0, 0, 1, 1),
                                 (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1),
                                 (1, 1, 1, 0), (1, 1, 1, 0)])
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)
    def test_matching_named_fields(self):
        # Test combination of arrays w/ matching field names
        (_, x, _, z) = self.data
        zz = np.array(
            [("a", 10.0, 100.0), ("b", 20.0, 200.0), ("c", 30.0, 300.0)],
            dtype=[("A", "|S3"), ("B", float), ("C", float)],
        )
        test = stack_arrays((z, zz))
        control = ma.array(
            [("A", 1, -1), ("B", 2, -1), ("a", 10.0, 100.0), ("b", 20.0, 200.0), ("c", 30.0, 300.0)],
            dtype=[("A", "|S3"), ("B", float), ("C", float)],
            mask=[(0, 0, 1), (0, 0, 1), (0, 0, 0), (0, 0, 0), (0, 0, 0)],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, zz, x))
        ndtype = [("A", "|S3"), ("B", float), ("C", float), ("f3", int)]
        control = ma.array(
            [
                ("A", 1, -1, -1),
                ("B", 2, -1, -1),
                ("a", 10.0, 100.0, -1),
                ("b", 20.0, 200.0, -1),
                ("c", 30.0, 300.0, -1),
                (-1, -1, -1, 1),
                (-1, -1, -1, 2),
            ],
            dtype=ndtype,
            mask=[(0, 0, 1, 1), (0, 0, 1, 1), (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1), (1, 1, 1, 0), (1, 1, 1, 0)],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)
Exemple #6
0
    def test_unnamed_and_named_fields(self):
        # Test combination of arrays w/ & w/o named fields
        (_, x, _, z) = self.data

        test = stack_arrays((x, z))
        control = ma.array([(1, -1, -1), (2, -1, -1),
                            (-1, 'A', 1), (-1, 'B', 2)],
                           mask=[(0, 1, 1), (0, 1, 1),
                                 (1, 0, 0), (1, 0, 0)],
                           dtype=[('f0', int), ('A', '|S3'), ('B', float)])
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, x))
        control = ma.array([('A', 1, -1), ('B', 2, -1),
                            (-1, -1, 1), (-1, -1, 2), ],
                           mask=[(0, 0, 1), (0, 0, 1),
                                 (1, 1, 0), (1, 1, 0)],
                           dtype=[('A', '|S3'), ('B', float), ('f2', int)])
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, z, x))
        control = ma.array([('A', 1, -1), ('B', 2, -1),
                            ('A', 1, -1), ('B', 2, -1),
                            (-1, -1, 1), (-1, -1, 2), ],
                           mask=[(0, 0, 1), (0, 0, 1),
                                 (0, 0, 1), (0, 0, 1),
                                 (1, 1, 0), (1, 1, 0)],
                           dtype=[('A', '|S3'), ('B', float), ('f2', int)])
        assert_equal(test, control)
    def analyze_chamber_data(self,raw_chamber_data):
        ethanol_data = raw_chamber_data[raw_chamber_data['status']=='Ethanol']
        analyzed_ethanol_data = self.analyze_data(ethanol_data)
        status_array = numpy.array(['Ethanol']*len(analyzed_ethanol_data),dtype='|S25')
        analyzed_chamber_data = recfunctions.append_fields(analyzed_ethanol_data,
                                                           'status',
                                                           status_array,
                                                           dtypes='|S25',
                                                           usemask=False)

        air_before_data = raw_chamber_data[raw_chamber_data['status']=='AirBefore']
        if air_before_data.size != 0:
            analyzed_air_before_data = self.analyze_data(air_before_data)
            status_array = numpy.array(['AirBefore']*len(analyzed_air_before_data),dtype='|S25')
            analyzed_air_before_data = recfunctions.append_fields(analyzed_air_before_data,
                                                                  'status',
                                                                  status_array,
                                                                  dtypes='|S25',
                                                                  usemask=False)
            analyzed_chamber_data = recfunctions.stack_arrays((analyzed_air_before_data,analyzed_chamber_data),usemask=False)


        air_after_data = raw_chamber_data[raw_chamber_data['status']=='AirAfter']
        if air_after_data.size != 0:
            analyzed_air_after_data = self.analyze_data(air_after_data)
            status_array = numpy.array(['AirAfter']*len(analyzed_air_after_data),dtype='|S25')
            analyzed_air_after_data = recfunctions.append_fields(analyzed_air_after_data,
                                                                  'status',
                                                                  status_array,
                                                                  dtypes='|S25',
                                                                  usemask=False)
            analyzed_chamber_data = recfunctions.stack_arrays((analyzed_chamber_data,analyzed_air_after_data),usemask=False)

        return analyzed_chamber_data
Exemple #8
0
def load_data( data_path, branch_names, dataset_names, dataset_ranges = []):  
    """ Import data from several ROOT files to a recarray """
    l_raw_vars = []
    l_weight = []
    l_origin = []
    for i, d_name in enumerate(dataset_names):
        f_name =  "{}{}.root".format(data_path,d_name)
        if "BTagCSV" in d_name:
            d_weight = 1.
        else:
            d_weight = mc_samples[d_name]["xs"]/mc_samples[d_name]["gen_events"] 
        if len(dataset_ranges) == len(dataset_names): 
            l_raw_vars.append(root2array(f_name,"tree", branch_names,
                              stop=dataset_ranges[i]))
        else:    
            l_raw_vars.append(root2array(f_name,"tree", branch_names))
        n_ev = l_raw_vars[-1].shape[0]
        l_weight.append(np.full((n_ev),d_weight, 'f8'))
        l_origin.append(np.full((n_ev),d_name, 'a20'))
    raw_vars = stack_arrays(l_raw_vars, asrecarray=True, usemask=False)     
    weight = stack_arrays(l_weight, asrecarray=True, usemask=False)     
    origin = stack_arrays(l_origin, asrecarray=True, usemask=False)     
    raw_vars = append_fields(raw_vars, ["origin","weight"], [origin, weight],
                             asrecarray=True, usemask=False)
    return raw_vars
    def get_raw_chamber_data(self,filtered_data):
        # chamber_dtype = numpy.dtype([('time_secs', '<u4'),
        #                              ('time_nsecs', '<u4'),
        #                              ('time_rel', '<f4'),
        #                              ('status', '|S25'),
        #                              ('tunnel', '<u2'),
        #                              ('fly_x', '<f4'),
        #                              ('fly_y', '<f4'),
        #                              ('fly_angle', '<f4'),
        #                              ])
        header = list(FILE_TOOLS.chamber_dtype.names)
        tracking_chamber_data = filtered_data[filtered_data['status'] != 'Walk To End']
        tracking_chamber_data = tracking_chamber_data[header]
        tracking_chamber_data = tracking_chamber_data.astype(FILE_TOOLS.chamber_dtype)
        tracking_chamber_data['tunnel'] = tracking_chamber_data['tunnel']+1
        indicies = tracking_chamber_data['status'] == 'End Chamber Ethanol'
        raw_chamber_data_ethanol = tracking_chamber_data[indicies]
        raw_chamber_data_ethanol = recfunctions.drop_fields(raw_chamber_data_ethanol,
                                                            'status',
                                                            usemask=False)
        status_array = numpy.array(['Ethanol']*len(raw_chamber_data_ethanol),dtype='|S25')
        raw_chamber_data_ethanol = recfunctions.append_fields(raw_chamber_data_ethanol,
                                                              'status',
                                                              status_array,
                                                              dtypes='|S25',
                                                              usemask=False)
        raw_chamber_data = raw_chamber_data_ethanol

        ethanol_start_time = raw_chamber_data_ethanol['time_rel'][0]
        indicies = tracking_chamber_data['status'] == 'End Chamber Air'
        indicies &= tracking_chamber_data['time_rel'] < ethanol_start_time
        raw_chamber_data_air_before = tracking_chamber_data[indicies]
        raw_chamber_data_air_before = recfunctions.drop_fields(raw_chamber_data_air_before,
                                                               'status',
                                                               usemask=False)
        status_array = numpy.array(['AirBefore']*len(raw_chamber_data_air_before),dtype='|S25')
        raw_chamber_data_air_before = recfunctions.append_fields(raw_chamber_data_air_before,
                                                                 'status',
                                                                 status_array,
                                                                 dtypes='|S25',
                                                                 usemask=False)
        raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data_air_before,raw_chamber_data),usemask=False)

        indicies = tracking_chamber_data['status'] == 'End Chamber Air'
        indicies &= tracking_chamber_data['time_rel'] > ethanol_start_time
        raw_chamber_data_air_after = tracking_chamber_data[indicies]
        raw_chamber_data_air_after = recfunctions.drop_fields(raw_chamber_data_air_after,
                                                               'status',
                                                               usemask=False)
        status_array = numpy.array(['AirAfter']*len(raw_chamber_data_air_after),dtype='|S25')
        raw_chamber_data_air_after = recfunctions.append_fields(raw_chamber_data_air_after,
                                                                 'status',
                                                                 status_array,
                                                                 dtypes='|S25',
                                                                 usemask=False)
        raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data,raw_chamber_data_air_after),usemask=False)

        return raw_chamber_data
Exemple #10
0
    def test_solo(self):
        # Test stack_arrays on single arrays
        (_, x, _, _) = self.data
        test = stack_arrays((x,))
        assert_equal(test, x)
        self.assertTrue(test is x)

        test = stack_arrays(x)
        assert_equal(test, x)
        self.assertTrue(test is x)
 def test_autoconversion(self):
     # Tests autoconversion
     adtype = [('A', int), ('B', bool), ('C', float)]
     a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype)
     bdtype = [('A', int), ('B', float), ('C', float)]
     b = ma.array([(4, 5, 6)], dtype=bdtype)
     control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)],
                        dtype=bdtype)
     test = stack_arrays((a, b), autoconvert=True)
     assert_equal(test, control)
     assert_equal(test.mask, control.mask)
     with assert_raises(TypeError):
         stack_arrays((a, b), autoconvert=False)
Exemple #12
0
    def test_unnamed_fields(self):
        # Tests combinations of arrays w/o named fields
        (_, x, y, _) = self.data

        test = stack_arrays((x, x), usemask=False)
        control = np.array([1, 2, 1, 2])
        assert_equal(test, control)

        test = stack_arrays((x, y), usemask=False)
        control = np.array([1, 2, 10, 20, 30])
        assert_equal(test, control)

        test = stack_arrays((y, x), usemask=False)
        control = np.array([10, 20, 30, 1, 2])
        assert_equal(test, control)
Exemple #13
0
    def test_unnamed_fields(self):
        # Tests combinations of arrays w/o named fields
        (_, x, y, _) = self.data

        test = stack_arrays((x, x), usemask=False)
        control = np.array([1, 2, 1, 2])
        assert_equal(test, control)

        test = stack_arrays((x, y), usemask=False)
        control = np.array([1, 2, 10, 20, 30])
        assert_equal(test, control)

        test = stack_arrays((y, x), usemask=False)
        control = np.array([10, 20, 30, 1, 2])
        assert_equal(test, control)
Exemple #14
0
def produce_trial(
    analysis: Analysis,
    flux_norm: float = 0,
    random_seed: Optional[int] = None,
    n_signal_observed: Optional[int] = None,
    verbose: bool = False,
    **kwargs,
) -> np.ndarray:
    """Produces a single trial of background+signal events based on inputs.

    Args:
        analysis:
        flux_norm: A flux normaliization to adjust weights.
        random_seed: A seed value for the numpy RNG.
        n_signal_observed:
        verbose: A flag to print progress.

    Returns:
        An array of combined signal and background events.
    """
    # kwargs no-op
    len(kwargs)

    if random_seed is not None:
        np.random.seed(random_seed)

    background = analysis.model.inject_background_events()
    background['time'] = analysis.model.scramble_times(background['time'])

    if flux_norm > 0 or n_signal_observed is not None:
        signal = analysis.model.inject_signal_events(
            flux_norm,
            n_signal_observed,
        )

        signal['time'] = analysis.model.scramble_times(
            signal['time'],
            background=False,
        )

    else:
        signal = np.empty(0, dtype=background.dtype)

    if verbose:
        print(f'number of background events: {len(background)}')
        print(f'number of signal events: {len(signal)}')

    # Because we want to return the entire event and not just the
    # number of events, we need to do some numpy magic. Specifically,
    # we need to remove the fields in the simulated events that are
    # not present in the data events. These include the true direction,
    # energy, and 'oneweight'.
    signal = rf.drop_fields(
        signal,
        [n for n in signal.dtype.names if n not in background.dtype.names])

    # Combine the signal background events and time-sort them.
    # Use recfunctions.stack_arrays to prevent numpy from scrambling entry order
    events = rf.stack_arrays([background, signal], autoconvert=True)
    return events
    def summarize_data(self,analyzed_data):
        initialized = False
        tunnels = set(analyzed_data['tunnel'])
        for tunnel in tunnels:
            tunnel_data_analyzed = analyzed_data[analyzed_data['tunnel']==tunnel]

            tunnel_array = numpy.ones(1,dtype=numpy.uint16)*tunnel
            tunnel_array.dtype = numpy.dtype([('tunnel','<u2')])
            tunnel_data_summarized = tunnel_array

            delta_time = tunnel_data_analyzed['delta_time']
            total_time = delta_time.sum()
            distance = tunnel_data_analyzed['distance']
            total_distance = distance.sum()
            velocity = tunnel_data_analyzed['velocity']
            mean_velocity = velocity.mean()
            angular_velocity = tunnel_data_analyzed['angular_velocity']
            mean_angular_velocity = angular_velocity.mean()

            names = ['total_time','total_distance','mean_velocity','mean_angular_velocity']
            tunnel_data_seq = [total_time,total_distance,mean_velocity,mean_angular_velocity]
            tunnel_data_summarized = recfunctions.append_fields(tunnel_data_summarized,
                                                                names,
                                                                tunnel_data_seq,
                                                                dtypes=numpy.float32,
                                                                usemask=False)
            if initialized:
                summarized_data = recfunctions.stack_arrays((summarized_data,tunnel_data_summarized),usemask=False)
            else:
                summarized_data = tunnel_data_summarized
                initialized = True

        return summarized_data
Exemple #16
0
    def produce_sample(self, producer_pipe, logLmin):
        """
        main loop that generates samples and puts them in the queue for the nested sampler object
        """

        if not self.initialised:
            self.reset()

        self.counter = 0

        while True:
            if logLmin.value == np.inf:
                break

            p = producer_pipe.recv()

            if p is None:
                break

            self.evolution_points.append(p)
            (acceptance, Nmcmc,
             outParam) = next(self.metropolis_hastings(logLmin.value))

            # Send the sample to the Nested Sampler
            producer_pipe.send((acceptance, Nmcmc, outParam))
            # Update the ensemble every now and again

            if (self.counter % (self.poolsize / 10)
                ) == 0 or acceptance < 1.0 / float(self.poolsize):
                self.proposal.set_ensemble(self.evolution_points)
            self.counter += 1

        sys.stderr.write(
            "Sampler process {0!s}: MCMC samples accumulated = {1:d}\n".format(
                os.getpid(), len(self.samples)))
        thinning = int(np.ceil(np.mean(self.ACLs)))
        self.samples.extend(self.evolution_points)
        sys.stderr.write(
            "Sampler process {0!s}: Mean ACL measured (suggested thinning) = {1:d}\n"
            .format(os.getpid(), thinning))
        import numpy.lib.recfunctions as rfn
        self.mcmc_samples = rfn.stack_arrays(
            [self.samples[j].asnparray() for j in range(0, len(self.samples))],
            usemask=False)
        if self.verbose >= 3:
            np.savetxt(os.path.join(self.output,
                                    'mcmc_chain_%s.dat' % os.getpid()),
                       self.mcmc_samples.ravel(),
                       header=' '.join(self.mcmc_samples.dtype.names),
                       newline='\n',
                       delimiter=' ')
            sys.stderr.write(
                "Sampler process {0!s}: saved {1:d} mcmc samples in {2!s}\n".
                format(os.getpid(), len(self.samples),
                       'mcmc_chain_%s.dat' % os.getpid()))
        sys.stderr.write(
            "Sampler process {0!s} - mean acceptance {1:.3f}: exiting\n".
            format(os.getpid(),
                   float(self.mcmc_accepted) / float(self.mcmc_counter)))
        return 0
Exemple #17
0
    def test_subdtype(self):
        z = np.array([
            ('A', 1), ('B', 2)
        ], dtype=[('A', '|S3'), ('B', float, (1,))])
        zz = np.array([
            ('a', [10.], 100.), ('b', [20.], 200.), ('c', [30.], 300.)
        ], dtype=[('A', '|S3'), ('B', float, (1,)), ('C', float)])

        res = stack_arrays((z, zz))
        expected = ma.array(
            data=[
                (b'A', [1.0], 0),
                (b'B', [2.0], 0),
                (b'a', [10.0], 100.0),
                (b'b', [20.0], 200.0),
                (b'c', [30.0], 300.0)],
            mask=[
                (False, [False],  True),
                (False, [False],  True),
                (False, [False], False),
                (False, [False], False),
                (False, [False], False)
            ],
            dtype=zz.dtype
        )
        assert_equal(res.dtype, expected.dtype)
        assert_equal(res, expected)
        assert_equal(res.mask, expected.mask)
def computeDataPointCounts():
	dataSet = getDataSet('20150129', '20150331', '../../Data/Autopassdata/Singledatefiles/Dataset/raw/', 'dataset')
	dataPointCounts = np.zeros((288,62))
	firstDate = dataSet['dateAndTime'][1]
	firstDateStr = firstDate.strftime('%Y%m%d')
	date_list = [firstDate.date() + timedelta(days=x) for x in range(0, 62)]
	interval_list = [(datetime(2015, 1, 1, 0, 0, 0) + timedelta(minutes=x)).time() for x in range(0, 1440, 5)]
	interval_list.append(datetime(2015, 1, 1, 23, 59, 59).time())
	for i in range(0, len(date_list)):
		endDate = date_list[i]
		print(endDate)
		endDateStr = endDate.strftime('%Y%m%d')
		dataDateSubSet = []
		if i == 0:
			dataDateSubSet = getRowsWithinDateRange(firstDateStr, endDateStr, dataSet)
		else:
			dataDateSubSet = getRowsWithinDateRange(endDateStr, endDateStr, dataSet)
		for j in range(0, len(interval_list)-1):
			i1 = interval_list[j]
			i2 = interval_list[j+1]
			dataDateIntervalSubSet = getRowsWithinTimeIntervalRange(i1, i2, dataDateSubSet)
			if i == 0:
				dataPointCounts[j][i] = len(dataDateIntervalSubSet)
			else:
				dataPointCounts[j][i] = len(dataDateIntervalSubSet)
		print(dataPointCounts[:, i])
	dataPointCounts = rfn.stack_arrays(dataPointCounts,usemask=False)
	np.savetxt("dataPointCountsIndividualDates.csv", dataPointCounts, fmt="%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f")
Exemple #19
0
def recarray_from_pycbc_live(source,
                             ifo=None,
                             columns=None,
                             nproc=1,
                             **kwargs):
    """Read a `GWRecArray` from one or more PyCBC live files
    """
    source = file_list(source)
    if nproc > 1:
        from ...io.cache import read_cache
        return read_cache(source,
                          GWRecArray,
                          nproc,
                          None,
                          ifo=ifo,
                          columns=columns,
                          format='pycbc_live',
                          **kwargs)

    source = filter_empty_files(source, ifo=ifo)
    arrays = [
        recarray_from_file(x, ifo=ifo, columns=columns, **kwargs)
        for x in source
    ]
    return recfunctions.stack_arrays(arrays,
                                     asrecarray=True,
                                     usemask=False,
                                     autoconvert=True).view(GWRecArray)
Exemple #20
0
    def from_rows(cls, sample_id, row_data, extra_keys=()):
        dtype = list(cls._dtype)
        if extra_keys:
            blank_kwargs = {k: [] for k in extra_keys}
            new_cna = cls(sample_id, [], [], [], [], [], **blank_kwargs)
            if 'gc' in extra_keys:
                dtype.append(cls._dtype_gc)
            if 'rmask' in extra_keys:
                dtype.append(cls._dtype_rmask)
            if 'spread' in extra_keys:
                dtype.append(cls._dtype_spread)
            if 'weight' in extra_keys:
                dtype.append(cls._dtype_weight)
            if 'probes' in extra_keys:
                dtype.append(cls._dtype_probes)
        else:
            new_cna = cls(sample_id, [], [], [], [], [])

        if len(row_data) == 1:
            row_data = [tuple(row_data[0])]
        try:
            # Rows might be plain tuples
            new_array = numpy.asarray(row_data, dtype=dtype)
        except ValueError:
            # "Setting void-array with object members using buffer"
            # All rows are numpy.ndarray
            new_array = rfn.stack_arrays(row_data, usemask=False,
                                         asrecarray=True, autoconvert=False)
            # print(new_array.dtype)

        new_cna.data = new_array
        return new_cna
Exemple #21
0
 def __init__(self,
              input_files,
              mask_files=(),
              sensor_id=None,
              logger=None,
              selection='amp',
              linearity_correction=None):
     """
     Extract record array from the record arrays for all of the
     input_files.
     """
     self.sensor_id = sensor_id
     self.set_selection_function(selection)
     rec_arrays = []
     for infile in input_files:
         if logger is not None:
             logger.info("Processing %s" % infile)
         ccd = MaskedCCD(infile,
                         mask_files=mask_files,
                         linearity_correction=linearity_correction)
         for amp in ccd:
             rec_arrays.append(get_fp_pixels(ccd, amp))
     self.rec_array = nlr.stack_arrays(rec_arrays,
                                       usemask=False,
                                       autoconvert=True,
                                       asrecarray=True)
     self.amps = sorted(ccd.keys())
Exemple #22
0
    def __getattr__(self, attr):
        #print('getting', self.__class__.__name__, attr)
        if attr.startswith('__'):
            # we get asked for __setstate__ by copy.copy before we're
            # fully initialized. Just say no to all special names.
            raise AttributeError(attr)

        if not attr.startswith('_') and attr in getattr(
                self, '_array_attributes', {}):
            arr = [getattr(wave, attr) for wave in self.waves]
            if not arr:
                return np.empty(0)
            if isinstance(arr[0], vartype):
                return vartype.array(arr)
            if isinstance(arr[0], np.recarray):
                return recfunctions.stack_arrays(arr,
                                                 asrecarray=True,
                                                 usemask=False)
            if isinstance(arr[0], np.ndarray):
                return np.hstack(arr)
            return np.array(arr)

        if attr.startswith('mean_') and attr[5:] in getattr(
                self, '_mean_attributes', {}):
            values = self.__getattr__(attr[5:])
            return vartype.average(values)

        raise AttributeError('{} object does not have {} attribute'.format(
            self.__class__.__name__, attr))
Exemple #23
0
def root2panda(file_paths, tree_name, **kwargs):
    '''
    Args:
    -----
        files_path: a string like './data/*.root', for example
        tree_name: a string like 'Collection_Tree' corresponding to the name of the folder inside the root
                   file that we want to open
        kwargs: arguments taken by root2rec, such as branches to consider, etc
    Returns:
    --------
        output_panda: a panda dataframe like allbkg_df in which all the info from the root file will be stored

    Note:
    -----
        if you are working with .root files that contain different branches, you might have to mask your data
        in that case, return pd.DataFrame(ss.data)
    '''
    if isinstance(file_paths, basestring):
        files = glob.glob(file_paths)
    else:
        files = [matched_f for f in file_paths for matched_f in glob.glob(f)]

    ss = stack_arrays([root2rec(fpath, tree_name, **kwargs) for fpath in files])
    try:
        return pd.DataFrame(ss)
    except Exception:
        return pd.DataFrame(ss.data)
Exemple #24
0
def resampleMTdataAtFreq(MTdata,freqs):
    """
    Function to resample MTdata at set of frequencies

    """
    from SimPEG import MT
    # Make a rec array
    MTrec = MTdata.toRecArray().data

    # Find unique locations
    uniLoc = np.unique(MTrec[['x','y','z']])
    uniFreq = MTdata.survey.freqs
    # Get the comps
    dNames = MTrec.dtype

    # Loop over all the locations and interpolate
    for loc in uniLoc:
        # Find the index of the station
        ind = np.sqrt(np.sum((rec2ndarr(MTrec[['x','y','z']]) - rec2ndarr(loc))**2,axis=1)) < 1. # Find dist of 1 m accuracy
        # Make a temporary recArray and interpolate all the components
        tArrRec = np.concatenate((simpeg.mkvc(freqs,2),np.ones((len(freqs),1))*rec2ndarr(loc),np.nan*np.ones((len(freqs),12))),axis=1).view(dNames)
        for comp in ['zxxr','zxxi','zxyr','zxyi','zyxr','zyxi','zyyr','zyyi','tzxr','tzxi','tzyr','tzyi']:
            int1d = sciint.interp1d(MTrec[ind]['freq'],MTrec[ind][comp],bounds_error=False)
            tArrRec[comp] = simpeg.mkvc(int1d(freqs),2)

        # Join together
        try:
            outRecArr = recFunc.stack_arrays((outRecArr,tArrRec))
        except NameError as e:
            outRecArr = tArrRec

    # Make the MTdata and return
    return MT.Data.fromRecArray(outRecArr)
Exemple #25
0
def resampleNSEMdataAtFreq(NSEMdata, freqs):
    """
    Function to resample NSEMdata at set of frequencies

    """

    # Make a rec array
    NSEMrec = NSEMdata.toRecArray().data

    # Find unique locations
    uniLoc = np.unique(NSEMrec[['x','y','z']])
    uniFreq = NSEMdata.survey.freqs
    # Get the comps
    dNames = NSEMrec.dtype

    # Loop over all the locations and interpolate
    for loc in uniLoc:
        # Find the index of the station
        ind = np.sqrt(np.sum((rec_to_ndarr(NSEMrec[['x','y','z']]) - rec_to_ndarr(loc))**2,axis=1)) < 1. # Find dist of 1 m accuracy
        # Make a temporary recArray and interpolate all the components
        tArrRec = np.concatenate((simpeg.mkvc(freqs,2),np.ones((len(freqs),1))*rec_to_ndarr(loc),np.nan*np.ones((len(freqs),12))),axis=1).view(dNames)
        for comp in ['zxxr','zxxi','zxyr','zxyi','zyxr','zyxi','zyyr','zyyi','tzxr','tzxi','tzyr','tzyi']:
            int1d = sciint.interp1d(NSEMrec[ind]['freq'],NSEMrec[ind][comp],bounds_error=False)
            tArrRec[comp] = simpeg.mkvc(int1d(freqs),2)

        # Join together
        try:
            outRecArr = recFunc.stack_arrays((outRecArr,tArrRec))
        except NameError:
            outRecArr = tArrRec

    # Make the NSEMdata and return
    return Data.fromRecArray(outRecArr)
Exemple #26
0
    def test_subdtype(self):
        z = np.array([
            ('A', 1), ('B', 2)
        ], dtype=[('A', '|S3'), ('B', float, (1,))])
        zz = np.array([
            ('a', [10.], 100.), ('b', [20.], 200.), ('c', [30.], 300.)
        ], dtype=[('A', '|S3'), ('B', float, (1,)), ('C', float)])

        res = stack_arrays((z, zz))
        expected = ma.array(
            data=[
                (b'A', [1.0], 0),
                (b'B', [2.0], 0),
                (b'a', [10.0], 100.0),
                (b'b', [20.0], 200.0),
                (b'c', [30.0], 300.0)],
            mask=[
                (False, [False],  True),
                (False, [False],  True),
                (False, [False], False),
                (False, [False], False),
                (False, [False], False)
            ],
            dtype=zz.dtype
        )
        assert_equal(res.dtype, expected.dtype)
        assert_equal(res, expected)
        assert_equal(res.mask, expected.mask)
    def test_subdtype(self):
        z = np.array([("A", 1), ("B", 2)],
                     dtype=[("A", "|S3"), ("B", float, (1, ))])
        zz = np.array(
            [("a", [10.0], 100.0), ("b", [20.0], 200.0), ("c", [30.0], 300.0)],
            dtype=[("A", "|S3"), ("B", float, (1, )), ("C", float)],
        )

        res = stack_arrays((z, zz))
        expected = ma.array(
            data=[
                (b"A", [1.0], 0),
                (b"B", [2.0], 0),
                (b"a", [10.0], 100.0),
                (b"b", [20.0], 200.0),
                (b"c", [30.0], 300.0),
            ],
            mask=[
                (False, [False], True),
                (False, [False], True),
                (False, [False], False),
                (False, [False], False),
                (False, [False], False),
            ],
            dtype=zz.dtype,
        )
        assert_equal(res.dtype, expected.dtype)
        assert_equal(res, expected)
        assert_equal(res.mask, expected.mask)
Exemple #28
0
 def test_autoconversion(self):
     # Tests autoconversion
     adtype = [("A", int), ("B", bool), ("C", float)]
     a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype)
     bdtype = [("A", int), ("B", float), ("C", float)]
     b = ma.array([(4, 5, 6)], dtype=bdtype)
     control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], dtype=bdtype)
     test = stack_arrays((a, b), autoconvert=True)
     assert_equal(test, control)
     assert_equal(test.mask, control.mask)
     try:
         test = stack_arrays((a, b), autoconvert=False)
     except TypeError:
         pass
     else:
         raise AssertionError
Exemple #29
0
def veto_all(auxiliary, segmentlist):
    """Remove events from all auxiliary channel tables based on a segmentlist

    Parameters
    ----------
    auxiliary : `dict` of `numpy.recarray`
        a `dict` of event arrays to veto
    segmentlist : `~glue.segments.segmentlist`
        the list of veto segments to use

    Returns
    -------
    survivors : `dict` of `numpy.recarray`
        a dict of the reduced arrays of events for each input channel

    See Also
    --------
    core.veto
        for details on the veto algorithm itself
    """
    channels = auxiliary.keys()
    rec = stack_arrays(auxiliary.values(), usemask=False,
                       asrecarray=True, autoconvert=True)
    keep, _ = veto(rec, segmentlist)
    return dict((c, keep[keep['channel'] == c]) for c in channels)
Exemple #30
0
def main(iso_filename, XCov_filename, interpolate=True, overwrite=False):

    # FOR PARSEC ISOCHRONE (reversing it for interpolation)
    iso = ascii.read(iso_filename, header_start=13)[:114][::-1]
    iso = nprf.stack_arrays((iso[:25], iso[27:]),usemask=False) # because of stupid red clump turnaround

    # FOR DARTMOUTH ISOCHRONE (reversing it for interpolation)
    # iso = ascii.read(iso_filename, header_start=8)[::-1]

    # output hdf5 file
    with h5py.File(XCov_filename, mode='r+') as f:

        # feature and covariance matrices for all stars
        X = ps1_isoc_to_XCov(iso, W=mixing_matrix, interpolate=interpolate)

        if 'isochrone' in f and overwrite:
            f.__delitem__('isochrone')
            logger.debug("Overwriting isochrone data")

        if 'isochrone' not in f:
            g = f.create_group('isochrone')
        else:
            g = f['isochrone']

        if 'X' not in f['isochrone']:
            g.create_dataset('X', X.shape, dtype='f', data=X)

        f.flush()
        logger.debug("Saved isochrone to {}".format(XCov_filename))
Exemple #31
0
def root2pandas(files_path, tree_name, **kwargs):
   	'''
    Args:
    -----
        files_path: a string like './data/*.root', for example
        tree_name: a string like 'Collection_Tree' corresponding to the name of the folder inside the root 
                   file that we want to open
        kwargs: arguments taken by root2array, such as branches to consider, start, stop, step, etc
    Returns:
    --------    
        output_panda: a pandas dataframe like allbkg_df in which all the info from the root file will be stored
    
    Note:
    -----
        if you are working with .root files that contain different branches, you might have to mask your data
        in that case, return pd.DataFrame(ss.data)
	'''
    # -- create list of .root files to process
	files = glob.glob(files_path)
    
    # -- process ntuples into rec arrays
	ss = stack_arrays([root2array(fpath, tree_name, **kwargs).view(np.recarray) for fpath in files])
	try:
		return pd.DataFrame(ss)
	except Exception:
		return pd.DataFrame(ss.data)
    def test_matching_named_fields(self):
        # Test combination of arrays w/ matching field names
        (_, x, _, z) = self.data
        zz = np.array(
            [("a", 10.0, 100.0), ("b", 20.0, 200.0), ("c", 30.0, 300.0)],
            dtype=[("A", "|S3"), ("B", float), ("C", float)],
        )
        test = stack_arrays((z, zz))
        control = ma.array(
            [
                ("A", 1, -1),
                ("B", 2, -1),
                ("a", 10.0, 100.0),
                ("b", 20.0, 200.0),
                ("c", 30.0, 300.0),
            ],
            dtype=[("A", "|S3"), ("B", float), ("C", float)],
            mask=[(0, 0, 1), (0, 0, 1), (0, 0, 0), (0, 0, 0), (0, 0, 0)],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, zz, x))
        ndtype = [("A", "|S3"), ("B", float), ("C", float), ("f3", int)]
        control = ma.array(
            [
                ("A", 1, -1, -1),
                ("B", 2, -1, -1),
                ("a", 10.0, 100.0, -1),
                ("b", 20.0, 200.0, -1),
                ("c", 30.0, 300.0, -1),
                (-1, -1, -1, 1),
                (-1, -1, -1, 2),
            ],
            dtype=ndtype,
            mask=[
                (0, 0, 1, 1),
                (0, 0, 1, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (0, 0, 0, 1),
                (1, 1, 1, 0),
                (1, 1, 1, 0),
            ],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)
 def reduce_nearest_neighbor_batches(self, K=5, crop_pos=300, end_pos=3600, out_fname = 'movie.imatsh.txt'):
     """
     REDUCE nearest neighbor results from batch files
     Assumes that MAP batches consist of two media sources per batch: idx=[0,1]
     inputs:
         K       - number of nearest neighbors to return [5]
         crop_pos - crop media at 0+crop_pos, END-crop_pos seconds [300]
         end_pos  - position (in seconds) of last query to process [3600]
     """
     self.K = K if K is not None else self.K # NN to keep
     end_qpos = end_pos * self.FR # End Qpos in frames
     crop_spos = int( crop_pos * self.FR )
     self.nnMerged = []
     nnMerged = self.nnMerged
     for k, nnResult in enumerate(self.nnRaw):
         if len(nnResult):
             idx0 = nnResult['idx']==0
             if any(idx0):
                 s0_mx=max(nnResult['spos'][where(idx0)])
                 d0 = nnResult[idx0]
                 d0 = d0[where((d0['spos']>crop_spos) & (d0['spos']<s0_mx-crop_spos))]
             idx1 = nnResult['idx']==1
             if any(idx1):
                 s1_mx=max(nnResult['spos'][where(idx1)])
                 d1 = nnResult[idx1]
                 d1 = d1[where((d1['spos']>crop_spos) & (d1['spos']<s1_mx-crop_spos))]
             if len(d0) and len(d1):
                 bar = rfn.stack_arrays([d0,d1])
             elif len(d0):
                 bar = d0
             elif len(d1):
                 bar = d1
             else:
                 continue
             bar['dist'][where(isnan(bar['dist']))]=2
             bar['prob'][where(isnan(bar['prob']))]=0
             bar['amp'][where(isnan(bar['amp']))]=0
             bar['idx'] = bar['idx'] + 2*k
             nnMerged.append(bar)
     nnMerged = rfn.stack_arrays(nnMerged)
     nnMerged = nnMerged[where(nnMerged['dist']!=nan)]
     nnMerged = np.sort(nnMerged,kind='mergesort',order=['qpos','dist'])
     nnMerged = [nnMerged[where(nnMerged['qpos']==k)][:K].data for k in arange(0,nnMerged['qpos'].max()+1,self.shingle_hop)]
     nnMerged = rfn.stack_arrays(nnMerged)
     nnMerged = nnMerged[where(nnMerged['qpos']<=end_qpos)]
     self.nnMerged = nnMerged
     savetxt(out_fname,nnMerged,fmt=self.fields_str)
Exemple #34
0
    def importFiles(self):
        """
        Function to import EDI files into a object.


        """

        # Constants that are needed for convertion of units

        # Temp lists
        tmpStaList = []

        tmpCompList = ['freq','x','y','z']
        tmpCompList.extend(self.comps)
        # Make the outarray
        dtRI = [(compS.lower().replace('.',''),float) for compS in tmpCompList]
        # Loop through all the files
        for nrEDI, EDIfile in enumerate(self.filesList):
            # Read the file into a list of the lines
            with open(EDIfile,'r') as fid:
                EDIlines = fid.readlines()
            # Find the location
            latD, longD, elevM = _findLatLong(EDIlines)
            # Transfrom coordinates
            transCoord = self._transfromPoints(longD,latD)
            # Extract the name of the file (station)
            EDIname = EDIfile.split(os.sep)[-1].split('.')[0]
            # Arrange the data
            staList = [EDIname, EDIfile, transCoord[0], transCoord[1], elevM[0]]
            # Add to the station list
            tmpStaList.extend(staList)

            # Read the frequency data
            freq = _findEDIcomp('>FREQ',EDIlines)
            # Make the temporary rec array.
            tArrRec = ( np.nan*np.ones( (len(freq),len(dtRI)) ) ).view(dtRI)     #np.concatenate((freq*np.ones((locs.shape[0],1)),locs,np.nan*np.ones((locs.shape[0],8))),axis=1).view(dtRI)
            # Add data to the array
            tArrRec['freq'] = mkvc(freq,2)
            tArrRec['x'] = mkvc(np.ones((len(freq),1))*transCoord[0],2)
            tArrRec['y'] = mkvc(np.ones((len(freq),1))*transCoord[1],2)
            tArrRec['z'] = mkvc(np.ones((len(freq),1))*elevM[0],2)
            for comp in self.comps:
                # Deal with converting units of the impedance tensor
                if 'Z' in comp:
                    unitConvert = self._impUnitEDI2SI
                else:
                    unitConvert = 1
                # Rotate the data since EDI x is *north, y *east but Simpeg uses x *east, y *north (* means internal reference frame)
                key = [comp.lower().replace('.','').replace(s,t) for s,t in [['xx','yy'],['xy','yx'],['yx','xy'],['yy','xx']] if s in comp.lower()][0]
                tArrRec[key] = mkvc(unitConvert*_findEDIcomp('>'+comp,EDIlines),2)
            # Make a masked array
            mArrRec = np.ma.MaskedArray(rec2ndarr(tArrRec),mask=np.isnan(rec2ndarr(tArrRec))).view(dtype=tArrRec.dtype)
            try:
                outTemp = recFunc.stack_arrays((outTemp,mArrRec))
            except NameError as e:
                outTemp = mArrRec

        # Assign the data
        self._data = outTemp
Exemple #35
0
    def importFiles(self):
        """
        Function to import EDI files into a object.


        """

        # Constants that are needed for convertion of units

        # Temp lists
        tmpStaList = []

        tmpCompList = ['freq','x','y','z']
        tmpCompList.extend(self.comps)
        # Make the outarray
        dtRI = [(compS.lower().replace('.',''),float) for compS in tmpCompList]
        # Loop through all the files
        for nrEDI, EDIfile in enumerate(self.filesList):
            # Read the file into a list of the lines
            with open(EDIfile,'r') as fid:
                EDIlines = fid.readlines()
            # Find the location
            latD, longD, elevM = _findLatLong(EDIlines)
            # Transfrom coordinates
            transCoord = self._transfromPoints(longD,latD)
            # Extract the name of the file (station)
            EDIname = EDIfile.split(os.sep)[-1].split('.')[0]
            # Arrange the data
            staList = [EDIname, EDIfile, transCoord[0], transCoord[1], elevM[0]]
            # Add to the station list
            tmpStaList.extend(staList)

            # Read the frequency data
            freq = _findEDIcomp('>FREQ',EDIlines)
            # Make the temporary rec array.
            tArrRec = ( np.nan*np.ones( (len(freq),len(dtRI)) ) ).view(dtRI)     #np.concatenate((freq*np.ones((locs.shape[0],1)),locs,np.nan*np.ones((locs.shape[0],8))),axis=1).view(dtRI)
            # Add data to the array
            tArrRec['freq'] = mkvc(freq,2)
            tArrRec['x'] = mkvc(np.ones((len(freq),1))*transCoord[0],2)
            tArrRec['y'] = mkvc(np.ones((len(freq),1))*transCoord[1],2)
            tArrRec['z'] = mkvc(np.ones((len(freq),1))*elevM[0],2)
            for comp in self.comps:
                # Deal with converting units of the impedance tensor
                if 'Z' in comp:
                    unitConvert = self._impUnitEDI2SI
                else:
                    unitConvert = 1
                # Rotate the data since EDI x is *north, y *east but Simpeg uses x *east, y *north (* means internal reference frame)
                key = [comp.lower().replace('.','').replace(s,t) for s,t in [['xx','yy'],['xy','yx'],['yx','xy'],['yy','xx']] if s in comp.lower()][0]
                tArrRec[key] = mkvc(unitConvert*_findEDIcomp('>'+comp,EDIlines),2)
            # Make a masked array
            mArrRec = np.ma.MaskedArray(rec2ndarr(tArrRec),mask=np.isnan(rec2ndarr(tArrRec))).view(dtype=tArrRec.dtype)
            try:
                outTemp = recFunc.stack_arrays((outTemp,mArrRec))
            except NameError as e:
                outTemp = mArrRec

        # Assign the data
        self._data = outTemp
Exemple #36
0
 def stack(self, r, defaults=None):
     """
     Superposes arrays fields by fields
     """
     self.data = recfunctions.stack_arrays([self.data, r],
                                           defaults,
                                           usemask=False,
                                           asrecarray=True)
Exemple #37
0
def get_events_one_type(kwe_file, ev_type, ev_names=[], rec=None):
    if ev_names == []:
        ev_names = list_events(kwe_file, ev_type)
    ev_stack = [
        get_events_one_name(kwe_file, ev_type, ev_name, rec=rec)
        for ev_name in ev_names
    ]
    return rf.stack_arrays(ev_stack, asrecarray=True, usemask=False)
Exemple #38
0
 def test_autoconversion(self):
     # Tests autoconversion
     adtype = [('A', int), ('B', bool), ('C', float)]
     a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype)
     bdtype = [('A', int), ('B', float), ('C', float)]
     b = ma.array([(4, 5, 6)], dtype=bdtype)
     control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)],
                        dtype=bdtype)
     test = stack_arrays((a, b), autoconvert=True)
     assert_equal(test, control)
     assert_equal(test.mask, control.mask)
     try:
         test = stack_arrays((a, b), autoconvert=False)
     except TypeError:
         pass
     else:
         raise AssertionError
Exemple #39
0
    def _get_t_isochrones(self, logtmin, logtmax, dlogt, Z=0.0152):
        """ Generate a proper table directly from the PADOVA website

        Parameters
        ----------
        logtmin: float
            log-age min (age in yr)

        logtmax: float
            log-age max (age in yr)

        dlogt: float
            log-age step to request

        Z: float or sequence
            single value of list of values of metalicity Z

        returns
        -------
        tab: eztable.Table
            the table of isochrones
        """
        if not hasattr(Z, "__iter__"):
            iso_table = parsec.get_t_isochrones(max(6.0, logtmin),
                                                min(10.13, logtmax),
                                                dlogt,
                                                Z,
                                                model=self.modeltype)
            iso_table.header[
                "NAME"] = "PadovaCMD Isochrones: " + self.modeltype
            if "Z" not in iso_table:
                iso_table.add_column("Z", np.ones(iso_table.nrows) * Z)

            # rename cols, remove phot and other unnecessary cols
            iso_table = self._clean_cols(iso_table)

            # filter iso data: pre-ms and bad points
            iso_table = self._filter_iso_points(iso_table,
                                                filterPMS=self.filterPMS,
                                                filterBad=self.filterBad)

        else:
            iso_table = self._get_t_isochrones(logtmin, logtmax, dlogt, Z[0])
            iso_table.header[
                "NAME"] = "PadovaCMD Isochrones: " + self.modeltype

            if len(Z) > 1:
                more = [
                    self._get_t_isochrones(logtmin, logtmax, dlogt, Zk).data
                    for Zk in Z[1:]
                ]
                iso_table.data = recfunctions.stack_arrays([iso_table.data] +
                                                           more,
                                                           usemask=False,
                                                           asrecarray=True)

        return iso_table
Exemple #40
0
def read(filelist):
    data = []
    for f in sorted(filelist):
        x = np.load(f)
        if len(data) == 0:
            data = x.copy()
        else:
            data = rf.stack_arrays([data, x])
    return data
 def combine_healpix_files(folder):
     file_names = glob.glob1(folder, '*')
     array_list = []
     for file in file_names:
         array_list.append(np.load(folder + file))
     result = stack_arrays(array_list, usemask=False, autoconvert=True)
     print('Length of the stacked download:', len(result),
           ' And the dtype:', result.dtype)
     return (result)
def find_and_measure_peaks(data, peak_flux_list=None, use_flux_con=True, ignore_defects=True,
                            window_size=11,sigma=5,p=0.5,percentile=10):
    global ts

    if peak_flux_list is None:
        peak_flux_list = []

    ts = mark_time()
    found_peaks, found_inds = real_find_peaks(data,window_size=window_size,p=p,sigma=sigma,percentile=percentile)
    ts = mark_time('real_find_peaks', ts)
    removed = False

    min_wavelength = np.ma.min(data['wavelength'])
    max_wavelength = np.ma.max(data['wavelength'])

    #print found_peaks
    #print found_inds
    for candidate_peak, candidate_ind in zip(found_peaks, found_inds):
        removed = False
        if candidate_peak is np.ma.masked:
            continue
        if candidate_peak > max_wavelength or candidate_peak < min_wavelength:
            continue

        for peak in peak_flux_list:
            if (candidate_peak > peak['wavelength_lower_bound'] and
                    candidate_peak < peak['wavelength_upper_bound']  and
                    np.abs(candidate_ind - peak['index_lower_bound']) >= max_peak_width and
                    np.abs(candidate_ind - peak['index_upper_bound']) >= max_peak_width):
                #found_peaks.remove(peak)
                removed=True
                break

        if ~removed:
            #ts = mark_time()
            target_flux_totals = get_total_flux("UNKNOWN", data['wavelength'], data['flux'],
                                None if not use_flux_con else data['con_flux'], candidate_peak,
                                ignore_defects=ignore_defects)
            #ts = mark_time('get_total_flux', ts)
            peak_flux_list.append(target_flux_totals)
    ts = mark_time('flux loop', ts)

    #Now, need to prune the list
    arr = rfn.stack_arrays(peak_flux_list)
    ts = mark_time('stack_arrays', ts)
    peak_flux = Table(data=arr)
    ts = mark_time('create table', ts)

    #save_data(peak_flux, 'pre_filter')

    peak_flux.remove_rows(np.abs(peak_flux['peak_delta']) > max_peak_width)
    peak_flux = filter_for_overlaps(peak_flux, ['index_lower_bound', 'index_upper_bound'])
    peak_flux = filter_for_overlaps(peak_flux, ['index_lower_bound'])
    peak_flux = filter_for_overlaps(peak_flux, ['index_upper_bound'])
    ts = mark_time('filter_for_overlaps', ts)

    return peak_flux
Exemple #43
0
def read_array_info(entry):
	data = try_read(files.read_array_info, "array_info", entry.array_info)
	info = recfunctions.stack_arrays([
		build_detname(data.info.det_uid, entry),
		recfunctions.drop_fields(data.info, "det_uid"),
		])
	return dataset.DataSet([
		dataset.DataField("array_info",data),
		dataset.DataField("entry", entry)])
Exemple #44
0
def get_filaments(array, id_name):
    """Calculate the size and members of each filament"""
    filaments = []
    current_id = array[id_name][0]
    current_filament = []

    for entry in array:
        if entry[id_name] != current_id:
            current_id = entry[id_name]
            filaments.append(np.atleast_1d(nlr.stack_arrays(current_filament)))
            current_filament = []
            current_filament.append(entry)
        else:
            current_filament.append(entry)

    filaments.append(np.atleast_1d(nlr.stack_arrays(current_filament)))

    return np.array(filaments)
Exemple #45
0
def calculate_fret(acc_locs, don_locs):
    """
    Calculate the FRET efficiceny in picked regions, this is for one trace
    """
    fret_dict = {}
    if len(acc_locs) == 0:
        max_frames = _np.max(don_locs['frame'])
    elif len(don_locs) == 0:
        max_frames = _np.max(acc_locs['frame'])
    else:
        max_frames = _np.max(
            [_np.max(acc_locs['frame']),
             _np.max(don_locs['frame'])])

    #Initialize a vector filled with zeros for the duration of the movie
    xvec = _np.arange(max_frames + 1)
    yvec = xvec[:] * 0

    acc_trace = yvec.copy()
    don_trace = yvec.copy()

    #Fill vector with the photon numbers of events that happend
    acc_trace[acc_locs['frame']] = acc_locs['photons'] - acc_locs['bg']
    don_trace[don_locs['frame']] = don_locs['photons'] - don_locs['bg']

    #Calculate the FRET efficiency
    fret_trace = acc_trace / (acc_trace + don_trace)

    #Only select FRET values between 0 and 1
    selector = _np.logical_and(fret_trace > 0, fret_trace < 1)

    #select the final fret events based on the 0 to 1 range
    fret_events = fret_trace[selector]

    fret_timepoints = _np.arange(len(fret_trace))[selector]

    # Calculate FRET localizations:  Select the localizations when FRET happens
    #loc_selector = [True if _ in fret_timepoints else False for _ in don_locs['frame'] ]
    #fret_locs = don_locs[loc_selector==True]

    sel_locs = []
    for element in fret_timepoints:
        sel_locs.append(don_locs[don_locs['frame'] == element])

    fret_locs = stack_arrays(sel_locs, asrecarray=True, usemask=False)

    fret_locs = _lib.append_to_rec(fret_locs, _np.array(fret_events), 'fret')

    fret_dict['fret_events'] = _np.array(fret_events)
    fret_dict['fret_timepoints'] = fret_timepoints
    fret_dict['acc_trace'] = acc_trace
    fret_dict['don_trace'] = don_trace
    fret_dict['frames'] = xvec
    fret_dict['maxframes'] = max_frames

    return fret_dict, fret_locs
Exemple #46
0
def list_sess_units(bird, sess, sorted=False):
    shanks = et.get_shanks_list(bird, sess)
    sess_units = None
    for shank in shanks:
        shank_units = list_shank_units(bird, sess, shank, sorted=False)
        if sess_units is None:
            sess_units = shank_units
        else:
            sess_units = rfn.stack_arrays((sess_units, shank_units))
    return sess_units
Exemple #47
0
def root2pandas(files, tree_name, **kwargs):
    # -- process ntuples into rec arrays
    ss = stack_arrays([
        root2array(fpath, tree_name, **kwargs).view(numpy.recarray)
        for fpath in files
    ])
    try:
        return pandas.DataFrame(ss)
    except Exception:
        return pandas.DataFrame(ss.data)
Exemple #48
0
 def test_checktitles(self):
     # Test using titles in the field names
     adtype = [(("a", "A"), int), (("b", "B"), bool), (("c", "C"), float)]
     a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype)
     bdtype = [(("a", "A"), int), (("b", "B"), bool), (("c", "C"), float)]
     b = ma.array([(4, 5, 6)], dtype=bdtype)
     test = stack_arrays((a, b))
     control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], dtype=bdtype)
     assert_equal(test, control)
     assert_equal(test.mask, control.mask)
    def summarize_chamber_data(self,analyzed_chamber_data):
        summarized_total_data = self.summarize_data(analyzed_chamber_data)
        status_array = numpy.array(['Total']*len(summarized_total_data),dtype='|S25')
        summarized_chamber_data = recfunctions.append_fields(summarized_total_data,
                                                             'status',
                                                             status_array,
                                                             dtypes='|S25',
                                                             usemask=False)

        air_before_data = analyzed_chamber_data[analyzed_chamber_data['status']=='AirBefore']
        if air_before_data.size != 0:
            summarized_air_before_data = self.summarize_data(air_before_data)
            status_array = numpy.array(['AirBefore']*len(summarized_air_before_data),dtype='|S25')
            summarized_air_before_data = recfunctions.append_fields(summarized_air_before_data,
                                                                    'status',
                                                                    status_array,
                                                                    dtypes='|S25',
                                                                    usemask=False)
            summarized_chamber_data = recfunctions.stack_arrays((summarized_chamber_data,summarized_air_before_data),usemask=False)

        ethanol_data = analyzed_chamber_data[analyzed_chamber_data['status']=='Ethanol']
        summarized_ethanol_data = self.summarize_data(ethanol_data)
        status_array = numpy.array(['Ethanol']*len(summarized_ethanol_data),dtype='|S25')
        summarized_ethanol_data = recfunctions.append_fields(summarized_ethanol_data,
                                                             'status',
                                                             status_array,
                                                             dtypes='|S25',
                                                             usemask=False)
        summarized_chamber_data = recfunctions.stack_arrays((summarized_chamber_data,summarized_ethanol_data),usemask=False)

        air_after_data = analyzed_chamber_data[analyzed_chamber_data['status']=='AirAfter']
        if air_after_data.size != 0:
            summarized_air_after_data = self.summarize_data(air_after_data)
            status_array = numpy.array(['AirAfter']*len(summarized_air_after_data),dtype='|S25')
            summarized_air_after_data = recfunctions.append_fields(summarized_air_after_data,
                                                                   'status',
                                                                   status_array,
                                                                   dtypes='|S25',
                                                                   usemask=False)
            summarized_chamber_data = recfunctions.stack_arrays((summarized_chamber_data,summarized_air_after_data),usemask=False)

        return summarized_chamber_data
    def test_unnamed_and_named_fields(self):
        # Test combination of arrays w/ & w/o named fields
        (_, x, _, z) = self.data

        test = stack_arrays((x, z))
        control = ma.array(
            [(1, -1, -1), (2, -1, -1), (-1, "A", 1), (-1, "B", 2)],
            mask=[(0, 1, 1), (0, 1, 1), (1, 0, 0), (1, 0, 0)],
            dtype=[("f0", int), ("A", "|S3"), ("B", float)],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, x))
        control = ma.array(
            [
                ("A", 1, -1),
                ("B", 2, -1),
                (-1, -1, 1),
                (-1, -1, 2),
            ],
            mask=[(0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)],
            dtype=[("A", "|S3"), ("B", float), ("f2", int)],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)

        test = stack_arrays((z, z, x))
        control = ma.array(
            [
                ("A", 1, -1),
                ("B", 2, -1),
                ("A", 1, -1),
                ("B", 2, -1),
                (-1, -1, 1),
                (-1, -1, 2),
            ],
            mask=[(0, 0, 1), (0, 0, 1), (0, 0, 1), (0, 0, 1), (1, 1, 0),
                  (1, 1, 0)],
            dtype=[("A", "|S3"), ("B", float), ("f2", int)],
        )
        assert_equal(test, control)
Exemple #51
0
    def toRecArray(self,returnType='RealImag'):
        '''
        Function that returns a numpy.recarray for a SimpegMT impedance data object.

        :param str returnType: Switches between returning a rec array where the impedance is split to real and imaginary ('RealImag') or is a complex ('Complex')

        '''

        # Define the record fields
        dtRI = [('freq',float),('x',float),('y',float),('z',float),('zxxr',float),('zxxi',float),('zxyr',float),('zxyi',float),
        ('zyxr',float),('zyxi',float),('zyyr',float),('zyyi',float),('tzxr',float),('tzxi',float),('tzyr',float),('tzyi',float)]
        dtCP = [('freq',float),('x',float),('y',float),('z',float),('zxx',complex),('zxy',complex),('zyx',complex),('zyy',complex),('tzx',complex),('tzy',complex)]
        impList = ['zxxr','zxxi','zxyr','zxyi','zyxr','zyxi','zyyr','zyyi']
        for src in self.survey.srcList:
            # Temp array for all the receivers of the source.
            # Note: needs to be written more generally, using diffterent rxTypes and not all the data at the locaitons
            # Assume the same locs for all RX
            locs = src.rxList[0].locs
            if locs.shape[1] == 1:
                locs = np.hstack((np.array([[0.0,0.0]]),locs))
            elif locs.shape[1] == 2:
                locs = np.hstack((np.array([[0.0]]),locs))
            tArrRec = np.concatenate((src.freq*np.ones((locs.shape[0],1)),locs,np.nan*np.ones((locs.shape[0],12))),axis=1).view(dtRI)
            # np.array([(src.freq,rx.locs[0,0],rx.locs[0,1],rx.locs[0,2],np.nan ,np.nan ,np.nan ,np.nan ,np.nan ,np.nan ,np.nan ,np.nan ) for rx in src.rxList],dtype=dtRI)
            # Get the type and the value for the DataMT object as a list
            typeList = [[rx.rxType.replace('z1d','zyx'),self[src,rx]] for rx in src.rxList]
            # Insert the values to the temp array
            for nr,(key,val) in enumerate(typeList):
                tArrRec[key] = mkvc(val,2)
            # Masked array
            mArrRec = np.ma.MaskedArray(rec2ndarr(tArrRec),mask=np.isnan(rec2ndarr(tArrRec))).view(dtype=tArrRec.dtype)
            # Unique freq and loc of the masked array
            uniFLmarr = np.unique(mArrRec[['freq','x','y','z']]).copy()

            try:
                outTemp = recFunc.stack_arrays((outTemp,mArrRec))
                #outTemp = np.concatenate((outTemp,dataBlock),axis=0)
            except NameError as e:
                outTemp = mArrRec

            if 'RealImag' in returnType:
                outArr = outTemp
            elif 'Complex' in returnType:
                # Add the real and imaginary to a complex number
                outArr = np.empty(outTemp.shape,dtype=dtCP)
                for comp in ['freq','x','y','z']:
                    outArr[comp] = outTemp[comp].copy()
                for comp in ['zxx','zxy','zyx','zyy','tzx','tzy']:
                    outArr[comp] = outTemp[comp+'r'].copy() + 1j*outTemp[comp+'i'].copy()
            else:
                raise NotImplementedError('{:s} is not implemented, as to be RealImag or Complex.')

        # Return
        return outArr
Exemple #52
0
def combine_data(data_all, data_new, dict):
    """
    combine_data reads in the data from a series of results_files,
    and includes the loc_IDs
    
    Inputs:     results_file
    
    Output:     data array
    
    """

    #==========================================================================
    import numpy as np
    import numpy.lib.recfunctions as rfn
    #==========================================================================

    print '  Combining data'

    for key, value in dict.items():
        # Define a new column that is the same value for everyone in the
        # new data array

        col = np.array(value)
        col = np.repeat(col, len(data_new))

        # Add this column to the data_new recarray
        if key == 'b0_order':
            data_new = rfn.append_fields(data_new,
                                         key,
                                         col,
                                         usemask=False,
                                         asrecarray=True,
                                         dtypes='S100')
        else:
            data_new = rfn.append_fields(data_new,
                                         key,
                                         col,
                                         usemask=False,
                                         asrecarray=True)

    # If data_all exists, then join data_new to the end of it
    if not data_all == None:

        data_all = rfn.stack_arrays((data_all, data_new),
                                    usemask=False,
                                    asrecarray=True)

    # If data_all doesn't yet exist then data_new becomes data_all
    else:
        data_all = data_new

    data_all.sort(order='loc_id')

    return data_all
Exemple #53
0
 def test_checktitles(self):
     # Test using titles in the field names
     adtype = [(('a', 'A'), int), (('b', 'B'), bool), (('c', 'C'), float)]
     a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype)
     bdtype = [(('a', 'A'), int), (('b', 'B'), bool), (('c', 'C'), float)]
     b = ma.array([(4, 5, 6)], dtype=bdtype)
     test = stack_arrays((a, b))
     control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)],
                        dtype=bdtype)
     assert_equal(test, control)
     assert_equal(test.mask, control.mask)
def saveDataSet(directory, filename, dataSet, format, header):
	nRows = dataSet.shape[0]
	firstDate = dataSet['dateAndTime'][0]
	lastDate = dataSet['dateAndTime'][nRows-1]
	nDays = (lastDate - firstDate).days
	dates = [firstDate + timedelta(x) for x in range(0, nDays+1)]
	for date in dates:
		dateStr = date.strftime('%Y%m%d')
		filenameStr = dateStr + filename
		rowsOnDate = [dataSet[i] for i in range(0, nRows) if dataSet['dateAndTime'][i].date() == date.date()]
		rowsOnDate = rfn.stack_arrays(rowsOnDate,usemask=False)
		np.savetxt(join(directory, filenameStr), rowsOnDate, fmt=format, header=header, comments='')	
Exemple #55
0
def recarray_from_pycbc_live(source, ifo=None, columns=None, nproc=1, **kwargs):
    """Read a `GWRecArray` from one or more PyCBC live files
    """
    source = file_list(source)
    if nproc > 1:
        from ...io.cache import read_cache

        return read_cache(source, GWRecArray, nproc, None, ifo=ifo, columns=columns, format="pycbc_live", **kwargs)

    source = filter_empty_files(source, ifo=ifo)
    arrays = [recarray_from_file(x, ifo=ifo, columns=columns, **kwargs) for x in source]
    return recfunctions.stack_arrays(arrays, asrecarray=True, usemask=False, autoconvert=True).view(GWRecArray)