def root2panda(files_path, tree_name, mask = False, **kwargs): ''' Args: ----- files_path: a string like './data/*.root', for example tree_name: a string like 'Collection_Tree' corresponding to the name of the folder inside the root file that we want to open kwargs: arguments taken by root2rec, such as branches to consider, etc Returns: -------- output_panda: a panda dataframe like allbkg_df in which all the info from the root file will be stored Note: ----- if you are working with .root files that contain different branches, you might have to mask your data in that case, return pd.DataFrame(ss.data) ''' files = glob.glob(files_path) # -- check whether a name was passed for the tree_name --> for root files with only one tree and no folders, # -- you do not need to specify any name (I believe) if (tree_name == ''): ss = stack_arrays([root2rec(fpath, **kwargs) for fpath in files]) else: ss = stack_arrays([root2rec(fpath, tree_name, **kwargs) for fpath in files]) if (mask): return pd.DataFrame(ss.data) else: try: return pd.DataFrame(ss) except Exception, e: return pd.DataFrame(ss.data)
def test_unnamed_and_named_fields(self): # Test combination of arrays w/ & w/o named fields (_, x, _, z) = self.data test = stack_arrays((x, z)) control = ma.array( [(1, -1, -1), (2, -1, -1), (-1, "A", 1), (-1, "B", 2)], mask=[(0, 1, 1), (0, 1, 1), (1, 0, 0), (1, 0, 0)], dtype=[("f0", int), ("A", "|S3"), ("B", float)], ) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, x)) control = ma.array( [("A", 1, -1), ("B", 2, -1), (-1, -1, 1), (-1, -1, 2)], mask=[(0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)], dtype=[("A", "|S3"), ("B", float), ("f2", int)], ) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, z, x)) control = ma.array( [("A", 1, -1), ("B", 2, -1), ("A", 1, -1), ("B", 2, -1), (-1, -1, 1), (-1, -1, 2)], mask=[(0, 0, 1), (0, 0, 1), (0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)], dtype=[("A", "|S3"), ("B", float), ("f2", int)], ) assert_equal(test, control)
def combine_datasets(dataset_list): """ Definition: ----------- Function that combines a list datasets into a single dataset Each of the inputs (and the output) should have the form {"X":data, "y":recarray, "w":recarray} This allows us to combine datasets from different input files Args: ----- dataset_list = array of dictionaries of the form {"X":data, "y":recarray, "w":recarray} Returns: -------- dictionary of the form {"X":data, "y":recarray, "w":recarray} containing all input information """ # -- y and w are 1D arrays which are simple to combine y_combined = stack_arrays([dataset["y"] for dataset in dataset_list], asrecarray=True, usemask=False) w_combined = stack_arrays([dataset["w"] for dataset in dataset_list], asrecarray=True, usemask=False) # print dataset_list[0]["X"].dtype # -- Construct the desired output shape using the known size of y_combined # Necessary shape is (N_elements, N_categories) X_shape = (y_combined.shape[0], dataset_list[0]["X"].shape[1]) # -- Stack X arrays and then reshape X_combined = stack_arrays([dataset["X"] for dataset in dataset_list], asrecarray=True, usemask=False) X_combined.resize(X_shape) # -- Recombine into a dictionary and return return {"X": X_combined, "y": y_combined, "w": w_combined}
def test_matching_named_fields(self): # Test combination of arrays w/ matching field names (_, x, _, z) = self.data zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], dtype=[('A', '|S3'), ('B', float), ('C', float)]) test = stack_arrays((z, zz)) control = ma.array([('A', 1, -1), ('B', 2, -1), ( 'a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], dtype=[('A', '|S3'), ('B', float), ('C', float)], mask=[(0, 0, 1), (0, 0, 1), (0, 0, 0), (0, 0, 0), (0, 0, 0)]) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, zz, x)) ndtype = [('A', '|S3'), ('B', float), ('C', float), ('f3', int)] control = ma.array([('A', 1, -1, -1), ('B', 2, -1, -1), ('a', 10., 100., -1), ('b', 20., 200., -1), ('c', 30., 300., -1), (-1, -1, -1, 1), (-1, -1, -1, 2)], dtype=ndtype, mask=[(0, 0, 1, 1), (0, 0, 1, 1), (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1), (1, 1, 1, 0), (1, 1, 1, 0)]) assert_equal(test, control) assert_equal(test.mask, control.mask)
def test_matching_named_fields(self): # Test combination of arrays w/ matching field names (_, x, _, z) = self.data zz = np.array( [("a", 10.0, 100.0), ("b", 20.0, 200.0), ("c", 30.0, 300.0)], dtype=[("A", "|S3"), ("B", float), ("C", float)], ) test = stack_arrays((z, zz)) control = ma.array( [("A", 1, -1), ("B", 2, -1), ("a", 10.0, 100.0), ("b", 20.0, 200.0), ("c", 30.0, 300.0)], dtype=[("A", "|S3"), ("B", float), ("C", float)], mask=[(0, 0, 1), (0, 0, 1), (0, 0, 0), (0, 0, 0), (0, 0, 0)], ) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, zz, x)) ndtype = [("A", "|S3"), ("B", float), ("C", float), ("f3", int)] control = ma.array( [ ("A", 1, -1, -1), ("B", 2, -1, -1), ("a", 10.0, 100.0, -1), ("b", 20.0, 200.0, -1), ("c", 30.0, 300.0, -1), (-1, -1, -1, 1), (-1, -1, -1, 2), ], dtype=ndtype, mask=[(0, 0, 1, 1), (0, 0, 1, 1), (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1), (1, 1, 1, 0), (1, 1, 1, 0)], ) assert_equal(test, control) assert_equal(test.mask, control.mask)
def test_unnamed_and_named_fields(self): # Test combination of arrays w/ & w/o named fields (_, x, _, z) = self.data test = stack_arrays((x, z)) control = ma.array([(1, -1, -1), (2, -1, -1), (-1, 'A', 1), (-1, 'B', 2)], mask=[(0, 1, 1), (0, 1, 1), (1, 0, 0), (1, 0, 0)], dtype=[('f0', int), ('A', '|S3'), ('B', float)]) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, x)) control = ma.array([('A', 1, -1), ('B', 2, -1), (-1, -1, 1), (-1, -1, 2), ], mask=[(0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)], dtype=[('A', '|S3'), ('B', float), ('f2', int)]) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, z, x)) control = ma.array([('A', 1, -1), ('B', 2, -1), ('A', 1, -1), ('B', 2, -1), (-1, -1, 1), (-1, -1, 2), ], mask=[(0, 0, 1), (0, 0, 1), (0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)], dtype=[('A', '|S3'), ('B', float), ('f2', int)]) assert_equal(test, control)
def analyze_chamber_data(self,raw_chamber_data): ethanol_data = raw_chamber_data[raw_chamber_data['status']=='Ethanol'] analyzed_ethanol_data = self.analyze_data(ethanol_data) status_array = numpy.array(['Ethanol']*len(analyzed_ethanol_data),dtype='|S25') analyzed_chamber_data = recfunctions.append_fields(analyzed_ethanol_data, 'status', status_array, dtypes='|S25', usemask=False) air_before_data = raw_chamber_data[raw_chamber_data['status']=='AirBefore'] if air_before_data.size != 0: analyzed_air_before_data = self.analyze_data(air_before_data) status_array = numpy.array(['AirBefore']*len(analyzed_air_before_data),dtype='|S25') analyzed_air_before_data = recfunctions.append_fields(analyzed_air_before_data, 'status', status_array, dtypes='|S25', usemask=False) analyzed_chamber_data = recfunctions.stack_arrays((analyzed_air_before_data,analyzed_chamber_data),usemask=False) air_after_data = raw_chamber_data[raw_chamber_data['status']=='AirAfter'] if air_after_data.size != 0: analyzed_air_after_data = self.analyze_data(air_after_data) status_array = numpy.array(['AirAfter']*len(analyzed_air_after_data),dtype='|S25') analyzed_air_after_data = recfunctions.append_fields(analyzed_air_after_data, 'status', status_array, dtypes='|S25', usemask=False) analyzed_chamber_data = recfunctions.stack_arrays((analyzed_chamber_data,analyzed_air_after_data),usemask=False) return analyzed_chamber_data
def load_data( data_path, branch_names, dataset_names, dataset_ranges = []): """ Import data from several ROOT files to a recarray """ l_raw_vars = [] l_weight = [] l_origin = [] for i, d_name in enumerate(dataset_names): f_name = "{}{}.root".format(data_path,d_name) if "BTagCSV" in d_name: d_weight = 1. else: d_weight = mc_samples[d_name]["xs"]/mc_samples[d_name]["gen_events"] if len(dataset_ranges) == len(dataset_names): l_raw_vars.append(root2array(f_name,"tree", branch_names, stop=dataset_ranges[i])) else: l_raw_vars.append(root2array(f_name,"tree", branch_names)) n_ev = l_raw_vars[-1].shape[0] l_weight.append(np.full((n_ev),d_weight, 'f8')) l_origin.append(np.full((n_ev),d_name, 'a20')) raw_vars = stack_arrays(l_raw_vars, asrecarray=True, usemask=False) weight = stack_arrays(l_weight, asrecarray=True, usemask=False) origin = stack_arrays(l_origin, asrecarray=True, usemask=False) raw_vars = append_fields(raw_vars, ["origin","weight"], [origin, weight], asrecarray=True, usemask=False) return raw_vars
def get_raw_chamber_data(self,filtered_data): # chamber_dtype = numpy.dtype([('time_secs', '<u4'), # ('time_nsecs', '<u4'), # ('time_rel', '<f4'), # ('status', '|S25'), # ('tunnel', '<u2'), # ('fly_x', '<f4'), # ('fly_y', '<f4'), # ('fly_angle', '<f4'), # ]) header = list(FILE_TOOLS.chamber_dtype.names) tracking_chamber_data = filtered_data[filtered_data['status'] != 'Walk To End'] tracking_chamber_data = tracking_chamber_data[header] tracking_chamber_data = tracking_chamber_data.astype(FILE_TOOLS.chamber_dtype) tracking_chamber_data['tunnel'] = tracking_chamber_data['tunnel']+1 indicies = tracking_chamber_data['status'] == 'End Chamber Ethanol' raw_chamber_data_ethanol = tracking_chamber_data[indicies] raw_chamber_data_ethanol = recfunctions.drop_fields(raw_chamber_data_ethanol, 'status', usemask=False) status_array = numpy.array(['Ethanol']*len(raw_chamber_data_ethanol),dtype='|S25') raw_chamber_data_ethanol = recfunctions.append_fields(raw_chamber_data_ethanol, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = raw_chamber_data_ethanol ethanol_start_time = raw_chamber_data_ethanol['time_rel'][0] indicies = tracking_chamber_data['status'] == 'End Chamber Air' indicies &= tracking_chamber_data['time_rel'] < ethanol_start_time raw_chamber_data_air_before = tracking_chamber_data[indicies] raw_chamber_data_air_before = recfunctions.drop_fields(raw_chamber_data_air_before, 'status', usemask=False) status_array = numpy.array(['AirBefore']*len(raw_chamber_data_air_before),dtype='|S25') raw_chamber_data_air_before = recfunctions.append_fields(raw_chamber_data_air_before, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data_air_before,raw_chamber_data),usemask=False) indicies = tracking_chamber_data['status'] == 'End Chamber Air' indicies &= tracking_chamber_data['time_rel'] > ethanol_start_time raw_chamber_data_air_after = tracking_chamber_data[indicies] raw_chamber_data_air_after = recfunctions.drop_fields(raw_chamber_data_air_after, 'status', usemask=False) status_array = numpy.array(['AirAfter']*len(raw_chamber_data_air_after),dtype='|S25') raw_chamber_data_air_after = recfunctions.append_fields(raw_chamber_data_air_after, 'status', status_array, dtypes='|S25', usemask=False) raw_chamber_data = recfunctions.stack_arrays((raw_chamber_data,raw_chamber_data_air_after),usemask=False) return raw_chamber_data
def test_solo(self): # Test stack_arrays on single arrays (_, x, _, _) = self.data test = stack_arrays((x,)) assert_equal(test, x) self.assertTrue(test is x) test = stack_arrays(x) assert_equal(test, x) self.assertTrue(test is x)
def test_autoconversion(self): # Tests autoconversion adtype = [('A', int), ('B', bool), ('C', float)] a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) bdtype = [('A', int), ('B', float), ('C', float)] b = ma.array([(4, 5, 6)], dtype=bdtype) control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], dtype=bdtype) test = stack_arrays((a, b), autoconvert=True) assert_equal(test, control) assert_equal(test.mask, control.mask) with assert_raises(TypeError): stack_arrays((a, b), autoconvert=False)
def test_unnamed_fields(self): # Tests combinations of arrays w/o named fields (_, x, y, _) = self.data test = stack_arrays((x, x), usemask=False) control = np.array([1, 2, 1, 2]) assert_equal(test, control) test = stack_arrays((x, y), usemask=False) control = np.array([1, 2, 10, 20, 30]) assert_equal(test, control) test = stack_arrays((y, x), usemask=False) control = np.array([10, 20, 30, 1, 2]) assert_equal(test, control)
def produce_trial( analysis: Analysis, flux_norm: float = 0, random_seed: Optional[int] = None, n_signal_observed: Optional[int] = None, verbose: bool = False, **kwargs, ) -> np.ndarray: """Produces a single trial of background+signal events based on inputs. Args: analysis: flux_norm: A flux normaliization to adjust weights. random_seed: A seed value for the numpy RNG. n_signal_observed: verbose: A flag to print progress. Returns: An array of combined signal and background events. """ # kwargs no-op len(kwargs) if random_seed is not None: np.random.seed(random_seed) background = analysis.model.inject_background_events() background['time'] = analysis.model.scramble_times(background['time']) if flux_norm > 0 or n_signal_observed is not None: signal = analysis.model.inject_signal_events( flux_norm, n_signal_observed, ) signal['time'] = analysis.model.scramble_times( signal['time'], background=False, ) else: signal = np.empty(0, dtype=background.dtype) if verbose: print(f'number of background events: {len(background)}') print(f'number of signal events: {len(signal)}') # Because we want to return the entire event and not just the # number of events, we need to do some numpy magic. Specifically, # we need to remove the fields in the simulated events that are # not present in the data events. These include the true direction, # energy, and 'oneweight'. signal = rf.drop_fields( signal, [n for n in signal.dtype.names if n not in background.dtype.names]) # Combine the signal background events and time-sort them. # Use recfunctions.stack_arrays to prevent numpy from scrambling entry order events = rf.stack_arrays([background, signal], autoconvert=True) return events
def summarize_data(self,analyzed_data): initialized = False tunnels = set(analyzed_data['tunnel']) for tunnel in tunnels: tunnel_data_analyzed = analyzed_data[analyzed_data['tunnel']==tunnel] tunnel_array = numpy.ones(1,dtype=numpy.uint16)*tunnel tunnel_array.dtype = numpy.dtype([('tunnel','<u2')]) tunnel_data_summarized = tunnel_array delta_time = tunnel_data_analyzed['delta_time'] total_time = delta_time.sum() distance = tunnel_data_analyzed['distance'] total_distance = distance.sum() velocity = tunnel_data_analyzed['velocity'] mean_velocity = velocity.mean() angular_velocity = tunnel_data_analyzed['angular_velocity'] mean_angular_velocity = angular_velocity.mean() names = ['total_time','total_distance','mean_velocity','mean_angular_velocity'] tunnel_data_seq = [total_time,total_distance,mean_velocity,mean_angular_velocity] tunnel_data_summarized = recfunctions.append_fields(tunnel_data_summarized, names, tunnel_data_seq, dtypes=numpy.float32, usemask=False) if initialized: summarized_data = recfunctions.stack_arrays((summarized_data,tunnel_data_summarized),usemask=False) else: summarized_data = tunnel_data_summarized initialized = True return summarized_data
def produce_sample(self, producer_pipe, logLmin): """ main loop that generates samples and puts them in the queue for the nested sampler object """ if not self.initialised: self.reset() self.counter = 0 while True: if logLmin.value == np.inf: break p = producer_pipe.recv() if p is None: break self.evolution_points.append(p) (acceptance, Nmcmc, outParam) = next(self.metropolis_hastings(logLmin.value)) # Send the sample to the Nested Sampler producer_pipe.send((acceptance, Nmcmc, outParam)) # Update the ensemble every now and again if (self.counter % (self.poolsize / 10) ) == 0 or acceptance < 1.0 / float(self.poolsize): self.proposal.set_ensemble(self.evolution_points) self.counter += 1 sys.stderr.write( "Sampler process {0!s}: MCMC samples accumulated = {1:d}\n".format( os.getpid(), len(self.samples))) thinning = int(np.ceil(np.mean(self.ACLs))) self.samples.extend(self.evolution_points) sys.stderr.write( "Sampler process {0!s}: Mean ACL measured (suggested thinning) = {1:d}\n" .format(os.getpid(), thinning)) import numpy.lib.recfunctions as rfn self.mcmc_samples = rfn.stack_arrays( [self.samples[j].asnparray() for j in range(0, len(self.samples))], usemask=False) if self.verbose >= 3: np.savetxt(os.path.join(self.output, 'mcmc_chain_%s.dat' % os.getpid()), self.mcmc_samples.ravel(), header=' '.join(self.mcmc_samples.dtype.names), newline='\n', delimiter=' ') sys.stderr.write( "Sampler process {0!s}: saved {1:d} mcmc samples in {2!s}\n". format(os.getpid(), len(self.samples), 'mcmc_chain_%s.dat' % os.getpid())) sys.stderr.write( "Sampler process {0!s} - mean acceptance {1:.3f}: exiting\n". format(os.getpid(), float(self.mcmc_accepted) / float(self.mcmc_counter))) return 0
def test_subdtype(self): z = np.array([ ('A', 1), ('B', 2) ], dtype=[('A', '|S3'), ('B', float, (1,))]) zz = np.array([ ('a', [10.], 100.), ('b', [20.], 200.), ('c', [30.], 300.) ], dtype=[('A', '|S3'), ('B', float, (1,)), ('C', float)]) res = stack_arrays((z, zz)) expected = ma.array( data=[ (b'A', [1.0], 0), (b'B', [2.0], 0), (b'a', [10.0], 100.0), (b'b', [20.0], 200.0), (b'c', [30.0], 300.0)], mask=[ (False, [False], True), (False, [False], True), (False, [False], False), (False, [False], False), (False, [False], False) ], dtype=zz.dtype ) assert_equal(res.dtype, expected.dtype) assert_equal(res, expected) assert_equal(res.mask, expected.mask)
def computeDataPointCounts(): dataSet = getDataSet('20150129', '20150331', '../../Data/Autopassdata/Singledatefiles/Dataset/raw/', 'dataset') dataPointCounts = np.zeros((288,62)) firstDate = dataSet['dateAndTime'][1] firstDateStr = firstDate.strftime('%Y%m%d') date_list = [firstDate.date() + timedelta(days=x) for x in range(0, 62)] interval_list = [(datetime(2015, 1, 1, 0, 0, 0) + timedelta(minutes=x)).time() for x in range(0, 1440, 5)] interval_list.append(datetime(2015, 1, 1, 23, 59, 59).time()) for i in range(0, len(date_list)): endDate = date_list[i] print(endDate) endDateStr = endDate.strftime('%Y%m%d') dataDateSubSet = [] if i == 0: dataDateSubSet = getRowsWithinDateRange(firstDateStr, endDateStr, dataSet) else: dataDateSubSet = getRowsWithinDateRange(endDateStr, endDateStr, dataSet) for j in range(0, len(interval_list)-1): i1 = interval_list[j] i2 = interval_list[j+1] dataDateIntervalSubSet = getRowsWithinTimeIntervalRange(i1, i2, dataDateSubSet) if i == 0: dataPointCounts[j][i] = len(dataDateIntervalSubSet) else: dataPointCounts[j][i] = len(dataDateIntervalSubSet) print(dataPointCounts[:, i]) dataPointCounts = rfn.stack_arrays(dataPointCounts,usemask=False) np.savetxt("dataPointCountsIndividualDates.csv", dataPointCounts, fmt="%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f")
def recarray_from_pycbc_live(source, ifo=None, columns=None, nproc=1, **kwargs): """Read a `GWRecArray` from one or more PyCBC live files """ source = file_list(source) if nproc > 1: from ...io.cache import read_cache return read_cache(source, GWRecArray, nproc, None, ifo=ifo, columns=columns, format='pycbc_live', **kwargs) source = filter_empty_files(source, ifo=ifo) arrays = [ recarray_from_file(x, ifo=ifo, columns=columns, **kwargs) for x in source ] return recfunctions.stack_arrays(arrays, asrecarray=True, usemask=False, autoconvert=True).view(GWRecArray)
def from_rows(cls, sample_id, row_data, extra_keys=()): dtype = list(cls._dtype) if extra_keys: blank_kwargs = {k: [] for k in extra_keys} new_cna = cls(sample_id, [], [], [], [], [], **blank_kwargs) if 'gc' in extra_keys: dtype.append(cls._dtype_gc) if 'rmask' in extra_keys: dtype.append(cls._dtype_rmask) if 'spread' in extra_keys: dtype.append(cls._dtype_spread) if 'weight' in extra_keys: dtype.append(cls._dtype_weight) if 'probes' in extra_keys: dtype.append(cls._dtype_probes) else: new_cna = cls(sample_id, [], [], [], [], []) if len(row_data) == 1: row_data = [tuple(row_data[0])] try: # Rows might be plain tuples new_array = numpy.asarray(row_data, dtype=dtype) except ValueError: # "Setting void-array with object members using buffer" # All rows are numpy.ndarray new_array = rfn.stack_arrays(row_data, usemask=False, asrecarray=True, autoconvert=False) # print(new_array.dtype) new_cna.data = new_array return new_cna
def __init__(self, input_files, mask_files=(), sensor_id=None, logger=None, selection='amp', linearity_correction=None): """ Extract record array from the record arrays for all of the input_files. """ self.sensor_id = sensor_id self.set_selection_function(selection) rec_arrays = [] for infile in input_files: if logger is not None: logger.info("Processing %s" % infile) ccd = MaskedCCD(infile, mask_files=mask_files, linearity_correction=linearity_correction) for amp in ccd: rec_arrays.append(get_fp_pixels(ccd, amp)) self.rec_array = nlr.stack_arrays(rec_arrays, usemask=False, autoconvert=True, asrecarray=True) self.amps = sorted(ccd.keys())
def __getattr__(self, attr): #print('getting', self.__class__.__name__, attr) if attr.startswith('__'): # we get asked for __setstate__ by copy.copy before we're # fully initialized. Just say no to all special names. raise AttributeError(attr) if not attr.startswith('_') and attr in getattr( self, '_array_attributes', {}): arr = [getattr(wave, attr) for wave in self.waves] if not arr: return np.empty(0) if isinstance(arr[0], vartype): return vartype.array(arr) if isinstance(arr[0], np.recarray): return recfunctions.stack_arrays(arr, asrecarray=True, usemask=False) if isinstance(arr[0], np.ndarray): return np.hstack(arr) return np.array(arr) if attr.startswith('mean_') and attr[5:] in getattr( self, '_mean_attributes', {}): values = self.__getattr__(attr[5:]) return vartype.average(values) raise AttributeError('{} object does not have {} attribute'.format( self.__class__.__name__, attr))
def root2panda(file_paths, tree_name, **kwargs): ''' Args: ----- files_path: a string like './data/*.root', for example tree_name: a string like 'Collection_Tree' corresponding to the name of the folder inside the root file that we want to open kwargs: arguments taken by root2rec, such as branches to consider, etc Returns: -------- output_panda: a panda dataframe like allbkg_df in which all the info from the root file will be stored Note: ----- if you are working with .root files that contain different branches, you might have to mask your data in that case, return pd.DataFrame(ss.data) ''' if isinstance(file_paths, basestring): files = glob.glob(file_paths) else: files = [matched_f for f in file_paths for matched_f in glob.glob(f)] ss = stack_arrays([root2rec(fpath, tree_name, **kwargs) for fpath in files]) try: return pd.DataFrame(ss) except Exception: return pd.DataFrame(ss.data)
def resampleMTdataAtFreq(MTdata,freqs): """ Function to resample MTdata at set of frequencies """ from SimPEG import MT # Make a rec array MTrec = MTdata.toRecArray().data # Find unique locations uniLoc = np.unique(MTrec[['x','y','z']]) uniFreq = MTdata.survey.freqs # Get the comps dNames = MTrec.dtype # Loop over all the locations and interpolate for loc in uniLoc: # Find the index of the station ind = np.sqrt(np.sum((rec2ndarr(MTrec[['x','y','z']]) - rec2ndarr(loc))**2,axis=1)) < 1. # Find dist of 1 m accuracy # Make a temporary recArray and interpolate all the components tArrRec = np.concatenate((simpeg.mkvc(freqs,2),np.ones((len(freqs),1))*rec2ndarr(loc),np.nan*np.ones((len(freqs),12))),axis=1).view(dNames) for comp in ['zxxr','zxxi','zxyr','zxyi','zyxr','zyxi','zyyr','zyyi','tzxr','tzxi','tzyr','tzyi']: int1d = sciint.interp1d(MTrec[ind]['freq'],MTrec[ind][comp],bounds_error=False) tArrRec[comp] = simpeg.mkvc(int1d(freqs),2) # Join together try: outRecArr = recFunc.stack_arrays((outRecArr,tArrRec)) except NameError as e: outRecArr = tArrRec # Make the MTdata and return return MT.Data.fromRecArray(outRecArr)
def resampleNSEMdataAtFreq(NSEMdata, freqs): """ Function to resample NSEMdata at set of frequencies """ # Make a rec array NSEMrec = NSEMdata.toRecArray().data # Find unique locations uniLoc = np.unique(NSEMrec[['x','y','z']]) uniFreq = NSEMdata.survey.freqs # Get the comps dNames = NSEMrec.dtype # Loop over all the locations and interpolate for loc in uniLoc: # Find the index of the station ind = np.sqrt(np.sum((rec_to_ndarr(NSEMrec[['x','y','z']]) - rec_to_ndarr(loc))**2,axis=1)) < 1. # Find dist of 1 m accuracy # Make a temporary recArray and interpolate all the components tArrRec = np.concatenate((simpeg.mkvc(freqs,2),np.ones((len(freqs),1))*rec_to_ndarr(loc),np.nan*np.ones((len(freqs),12))),axis=1).view(dNames) for comp in ['zxxr','zxxi','zxyr','zxyi','zyxr','zyxi','zyyr','zyyi','tzxr','tzxi','tzyr','tzyi']: int1d = sciint.interp1d(NSEMrec[ind]['freq'],NSEMrec[ind][comp],bounds_error=False) tArrRec[comp] = simpeg.mkvc(int1d(freqs),2) # Join together try: outRecArr = recFunc.stack_arrays((outRecArr,tArrRec)) except NameError: outRecArr = tArrRec # Make the NSEMdata and return return Data.fromRecArray(outRecArr)
def test_subdtype(self): z = np.array([("A", 1), ("B", 2)], dtype=[("A", "|S3"), ("B", float, (1, ))]) zz = np.array( [("a", [10.0], 100.0), ("b", [20.0], 200.0), ("c", [30.0], 300.0)], dtype=[("A", "|S3"), ("B", float, (1, )), ("C", float)], ) res = stack_arrays((z, zz)) expected = ma.array( data=[ (b"A", [1.0], 0), (b"B", [2.0], 0), (b"a", [10.0], 100.0), (b"b", [20.0], 200.0), (b"c", [30.0], 300.0), ], mask=[ (False, [False], True), (False, [False], True), (False, [False], False), (False, [False], False), (False, [False], False), ], dtype=zz.dtype, ) assert_equal(res.dtype, expected.dtype) assert_equal(res, expected) assert_equal(res.mask, expected.mask)
def test_autoconversion(self): # Tests autoconversion adtype = [("A", int), ("B", bool), ("C", float)] a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) bdtype = [("A", int), ("B", float), ("C", float)] b = ma.array([(4, 5, 6)], dtype=bdtype) control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], dtype=bdtype) test = stack_arrays((a, b), autoconvert=True) assert_equal(test, control) assert_equal(test.mask, control.mask) try: test = stack_arrays((a, b), autoconvert=False) except TypeError: pass else: raise AssertionError
def veto_all(auxiliary, segmentlist): """Remove events from all auxiliary channel tables based on a segmentlist Parameters ---------- auxiliary : `dict` of `numpy.recarray` a `dict` of event arrays to veto segmentlist : `~glue.segments.segmentlist` the list of veto segments to use Returns ------- survivors : `dict` of `numpy.recarray` a dict of the reduced arrays of events for each input channel See Also -------- core.veto for details on the veto algorithm itself """ channels = auxiliary.keys() rec = stack_arrays(auxiliary.values(), usemask=False, asrecarray=True, autoconvert=True) keep, _ = veto(rec, segmentlist) return dict((c, keep[keep['channel'] == c]) for c in channels)
def main(iso_filename, XCov_filename, interpolate=True, overwrite=False): # FOR PARSEC ISOCHRONE (reversing it for interpolation) iso = ascii.read(iso_filename, header_start=13)[:114][::-1] iso = nprf.stack_arrays((iso[:25], iso[27:]),usemask=False) # because of stupid red clump turnaround # FOR DARTMOUTH ISOCHRONE (reversing it for interpolation) # iso = ascii.read(iso_filename, header_start=8)[::-1] # output hdf5 file with h5py.File(XCov_filename, mode='r+') as f: # feature and covariance matrices for all stars X = ps1_isoc_to_XCov(iso, W=mixing_matrix, interpolate=interpolate) if 'isochrone' in f and overwrite: f.__delitem__('isochrone') logger.debug("Overwriting isochrone data") if 'isochrone' not in f: g = f.create_group('isochrone') else: g = f['isochrone'] if 'X' not in f['isochrone']: g.create_dataset('X', X.shape, dtype='f', data=X) f.flush() logger.debug("Saved isochrone to {}".format(XCov_filename))
def root2pandas(files_path, tree_name, **kwargs): ''' Args: ----- files_path: a string like './data/*.root', for example tree_name: a string like 'Collection_Tree' corresponding to the name of the folder inside the root file that we want to open kwargs: arguments taken by root2array, such as branches to consider, start, stop, step, etc Returns: -------- output_panda: a pandas dataframe like allbkg_df in which all the info from the root file will be stored Note: ----- if you are working with .root files that contain different branches, you might have to mask your data in that case, return pd.DataFrame(ss.data) ''' # -- create list of .root files to process files = glob.glob(files_path) # -- process ntuples into rec arrays ss = stack_arrays([root2array(fpath, tree_name, **kwargs).view(np.recarray) for fpath in files]) try: return pd.DataFrame(ss) except Exception: return pd.DataFrame(ss.data)
def test_matching_named_fields(self): # Test combination of arrays w/ matching field names (_, x, _, z) = self.data zz = np.array( [("a", 10.0, 100.0), ("b", 20.0, 200.0), ("c", 30.0, 300.0)], dtype=[("A", "|S3"), ("B", float), ("C", float)], ) test = stack_arrays((z, zz)) control = ma.array( [ ("A", 1, -1), ("B", 2, -1), ("a", 10.0, 100.0), ("b", 20.0, 200.0), ("c", 30.0, 300.0), ], dtype=[("A", "|S3"), ("B", float), ("C", float)], mask=[(0, 0, 1), (0, 0, 1), (0, 0, 0), (0, 0, 0), (0, 0, 0)], ) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, zz, x)) ndtype = [("A", "|S3"), ("B", float), ("C", float), ("f3", int)] control = ma.array( [ ("A", 1, -1, -1), ("B", 2, -1, -1), ("a", 10.0, 100.0, -1), ("b", 20.0, 200.0, -1), ("c", 30.0, 300.0, -1), (-1, -1, -1, 1), (-1, -1, -1, 2), ], dtype=ndtype, mask=[ (0, 0, 1, 1), (0, 0, 1, 1), (0, 0, 0, 1), (0, 0, 0, 1), (0, 0, 0, 1), (1, 1, 1, 0), (1, 1, 1, 0), ], ) assert_equal(test, control) assert_equal(test.mask, control.mask)
def reduce_nearest_neighbor_batches(self, K=5, crop_pos=300, end_pos=3600, out_fname = 'movie.imatsh.txt'): """ REDUCE nearest neighbor results from batch files Assumes that MAP batches consist of two media sources per batch: idx=[0,1] inputs: K - number of nearest neighbors to return [5] crop_pos - crop media at 0+crop_pos, END-crop_pos seconds [300] end_pos - position (in seconds) of last query to process [3600] """ self.K = K if K is not None else self.K # NN to keep end_qpos = end_pos * self.FR # End Qpos in frames crop_spos = int( crop_pos * self.FR ) self.nnMerged = [] nnMerged = self.nnMerged for k, nnResult in enumerate(self.nnRaw): if len(nnResult): idx0 = nnResult['idx']==0 if any(idx0): s0_mx=max(nnResult['spos'][where(idx0)]) d0 = nnResult[idx0] d0 = d0[where((d0['spos']>crop_spos) & (d0['spos']<s0_mx-crop_spos))] idx1 = nnResult['idx']==1 if any(idx1): s1_mx=max(nnResult['spos'][where(idx1)]) d1 = nnResult[idx1] d1 = d1[where((d1['spos']>crop_spos) & (d1['spos']<s1_mx-crop_spos))] if len(d0) and len(d1): bar = rfn.stack_arrays([d0,d1]) elif len(d0): bar = d0 elif len(d1): bar = d1 else: continue bar['dist'][where(isnan(bar['dist']))]=2 bar['prob'][where(isnan(bar['prob']))]=0 bar['amp'][where(isnan(bar['amp']))]=0 bar['idx'] = bar['idx'] + 2*k nnMerged.append(bar) nnMerged = rfn.stack_arrays(nnMerged) nnMerged = nnMerged[where(nnMerged['dist']!=nan)] nnMerged = np.sort(nnMerged,kind='mergesort',order=['qpos','dist']) nnMerged = [nnMerged[where(nnMerged['qpos']==k)][:K].data for k in arange(0,nnMerged['qpos'].max()+1,self.shingle_hop)] nnMerged = rfn.stack_arrays(nnMerged) nnMerged = nnMerged[where(nnMerged['qpos']<=end_qpos)] self.nnMerged = nnMerged savetxt(out_fname,nnMerged,fmt=self.fields_str)
def importFiles(self): """ Function to import EDI files into a object. """ # Constants that are needed for convertion of units # Temp lists tmpStaList = [] tmpCompList = ['freq','x','y','z'] tmpCompList.extend(self.comps) # Make the outarray dtRI = [(compS.lower().replace('.',''),float) for compS in tmpCompList] # Loop through all the files for nrEDI, EDIfile in enumerate(self.filesList): # Read the file into a list of the lines with open(EDIfile,'r') as fid: EDIlines = fid.readlines() # Find the location latD, longD, elevM = _findLatLong(EDIlines) # Transfrom coordinates transCoord = self._transfromPoints(longD,latD) # Extract the name of the file (station) EDIname = EDIfile.split(os.sep)[-1].split('.')[0] # Arrange the data staList = [EDIname, EDIfile, transCoord[0], transCoord[1], elevM[0]] # Add to the station list tmpStaList.extend(staList) # Read the frequency data freq = _findEDIcomp('>FREQ',EDIlines) # Make the temporary rec array. tArrRec = ( np.nan*np.ones( (len(freq),len(dtRI)) ) ).view(dtRI) #np.concatenate((freq*np.ones((locs.shape[0],1)),locs,np.nan*np.ones((locs.shape[0],8))),axis=1).view(dtRI) # Add data to the array tArrRec['freq'] = mkvc(freq,2) tArrRec['x'] = mkvc(np.ones((len(freq),1))*transCoord[0],2) tArrRec['y'] = mkvc(np.ones((len(freq),1))*transCoord[1],2) tArrRec['z'] = mkvc(np.ones((len(freq),1))*elevM[0],2) for comp in self.comps: # Deal with converting units of the impedance tensor if 'Z' in comp: unitConvert = self._impUnitEDI2SI else: unitConvert = 1 # Rotate the data since EDI x is *north, y *east but Simpeg uses x *east, y *north (* means internal reference frame) key = [comp.lower().replace('.','').replace(s,t) for s,t in [['xx','yy'],['xy','yx'],['yx','xy'],['yy','xx']] if s in comp.lower()][0] tArrRec[key] = mkvc(unitConvert*_findEDIcomp('>'+comp,EDIlines),2) # Make a masked array mArrRec = np.ma.MaskedArray(rec2ndarr(tArrRec),mask=np.isnan(rec2ndarr(tArrRec))).view(dtype=tArrRec.dtype) try: outTemp = recFunc.stack_arrays((outTemp,mArrRec)) except NameError as e: outTemp = mArrRec # Assign the data self._data = outTemp
def stack(self, r, defaults=None): """ Superposes arrays fields by fields """ self.data = recfunctions.stack_arrays([self.data, r], defaults, usemask=False, asrecarray=True)
def get_events_one_type(kwe_file, ev_type, ev_names=[], rec=None): if ev_names == []: ev_names = list_events(kwe_file, ev_type) ev_stack = [ get_events_one_name(kwe_file, ev_type, ev_name, rec=rec) for ev_name in ev_names ] return rf.stack_arrays(ev_stack, asrecarray=True, usemask=False)
def test_autoconversion(self): # Tests autoconversion adtype = [('A', int), ('B', bool), ('C', float)] a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) bdtype = [('A', int), ('B', float), ('C', float)] b = ma.array([(4, 5, 6)], dtype=bdtype) control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], dtype=bdtype) test = stack_arrays((a, b), autoconvert=True) assert_equal(test, control) assert_equal(test.mask, control.mask) try: test = stack_arrays((a, b), autoconvert=False) except TypeError: pass else: raise AssertionError
def _get_t_isochrones(self, logtmin, logtmax, dlogt, Z=0.0152): """ Generate a proper table directly from the PADOVA website Parameters ---------- logtmin: float log-age min (age in yr) logtmax: float log-age max (age in yr) dlogt: float log-age step to request Z: float or sequence single value of list of values of metalicity Z returns ------- tab: eztable.Table the table of isochrones """ if not hasattr(Z, "__iter__"): iso_table = parsec.get_t_isochrones(max(6.0, logtmin), min(10.13, logtmax), dlogt, Z, model=self.modeltype) iso_table.header[ "NAME"] = "PadovaCMD Isochrones: " + self.modeltype if "Z" not in iso_table: iso_table.add_column("Z", np.ones(iso_table.nrows) * Z) # rename cols, remove phot and other unnecessary cols iso_table = self._clean_cols(iso_table) # filter iso data: pre-ms and bad points iso_table = self._filter_iso_points(iso_table, filterPMS=self.filterPMS, filterBad=self.filterBad) else: iso_table = self._get_t_isochrones(logtmin, logtmax, dlogt, Z[0]) iso_table.header[ "NAME"] = "PadovaCMD Isochrones: " + self.modeltype if len(Z) > 1: more = [ self._get_t_isochrones(logtmin, logtmax, dlogt, Zk).data for Zk in Z[1:] ] iso_table.data = recfunctions.stack_arrays([iso_table.data] + more, usemask=False, asrecarray=True) return iso_table
def read(filelist): data = [] for f in sorted(filelist): x = np.load(f) if len(data) == 0: data = x.copy() else: data = rf.stack_arrays([data, x]) return data
def combine_healpix_files(folder): file_names = glob.glob1(folder, '*') array_list = [] for file in file_names: array_list.append(np.load(folder + file)) result = stack_arrays(array_list, usemask=False, autoconvert=True) print('Length of the stacked download:', len(result), ' And the dtype:', result.dtype) return (result)
def find_and_measure_peaks(data, peak_flux_list=None, use_flux_con=True, ignore_defects=True, window_size=11,sigma=5,p=0.5,percentile=10): global ts if peak_flux_list is None: peak_flux_list = [] ts = mark_time() found_peaks, found_inds = real_find_peaks(data,window_size=window_size,p=p,sigma=sigma,percentile=percentile) ts = mark_time('real_find_peaks', ts) removed = False min_wavelength = np.ma.min(data['wavelength']) max_wavelength = np.ma.max(data['wavelength']) #print found_peaks #print found_inds for candidate_peak, candidate_ind in zip(found_peaks, found_inds): removed = False if candidate_peak is np.ma.masked: continue if candidate_peak > max_wavelength or candidate_peak < min_wavelength: continue for peak in peak_flux_list: if (candidate_peak > peak['wavelength_lower_bound'] and candidate_peak < peak['wavelength_upper_bound'] and np.abs(candidate_ind - peak['index_lower_bound']) >= max_peak_width and np.abs(candidate_ind - peak['index_upper_bound']) >= max_peak_width): #found_peaks.remove(peak) removed=True break if ~removed: #ts = mark_time() target_flux_totals = get_total_flux("UNKNOWN", data['wavelength'], data['flux'], None if not use_flux_con else data['con_flux'], candidate_peak, ignore_defects=ignore_defects) #ts = mark_time('get_total_flux', ts) peak_flux_list.append(target_flux_totals) ts = mark_time('flux loop', ts) #Now, need to prune the list arr = rfn.stack_arrays(peak_flux_list) ts = mark_time('stack_arrays', ts) peak_flux = Table(data=arr) ts = mark_time('create table', ts) #save_data(peak_flux, 'pre_filter') peak_flux.remove_rows(np.abs(peak_flux['peak_delta']) > max_peak_width) peak_flux = filter_for_overlaps(peak_flux, ['index_lower_bound', 'index_upper_bound']) peak_flux = filter_for_overlaps(peak_flux, ['index_lower_bound']) peak_flux = filter_for_overlaps(peak_flux, ['index_upper_bound']) ts = mark_time('filter_for_overlaps', ts) return peak_flux
def read_array_info(entry): data = try_read(files.read_array_info, "array_info", entry.array_info) info = recfunctions.stack_arrays([ build_detname(data.info.det_uid, entry), recfunctions.drop_fields(data.info, "det_uid"), ]) return dataset.DataSet([ dataset.DataField("array_info",data), dataset.DataField("entry", entry)])
def get_filaments(array, id_name): """Calculate the size and members of each filament""" filaments = [] current_id = array[id_name][0] current_filament = [] for entry in array: if entry[id_name] != current_id: current_id = entry[id_name] filaments.append(np.atleast_1d(nlr.stack_arrays(current_filament))) current_filament = [] current_filament.append(entry) else: current_filament.append(entry) filaments.append(np.atleast_1d(nlr.stack_arrays(current_filament))) return np.array(filaments)
def calculate_fret(acc_locs, don_locs): """ Calculate the FRET efficiceny in picked regions, this is for one trace """ fret_dict = {} if len(acc_locs) == 0: max_frames = _np.max(don_locs['frame']) elif len(don_locs) == 0: max_frames = _np.max(acc_locs['frame']) else: max_frames = _np.max( [_np.max(acc_locs['frame']), _np.max(don_locs['frame'])]) #Initialize a vector filled with zeros for the duration of the movie xvec = _np.arange(max_frames + 1) yvec = xvec[:] * 0 acc_trace = yvec.copy() don_trace = yvec.copy() #Fill vector with the photon numbers of events that happend acc_trace[acc_locs['frame']] = acc_locs['photons'] - acc_locs['bg'] don_trace[don_locs['frame']] = don_locs['photons'] - don_locs['bg'] #Calculate the FRET efficiency fret_trace = acc_trace / (acc_trace + don_trace) #Only select FRET values between 0 and 1 selector = _np.logical_and(fret_trace > 0, fret_trace < 1) #select the final fret events based on the 0 to 1 range fret_events = fret_trace[selector] fret_timepoints = _np.arange(len(fret_trace))[selector] # Calculate FRET localizations: Select the localizations when FRET happens #loc_selector = [True if _ in fret_timepoints else False for _ in don_locs['frame'] ] #fret_locs = don_locs[loc_selector==True] sel_locs = [] for element in fret_timepoints: sel_locs.append(don_locs[don_locs['frame'] == element]) fret_locs = stack_arrays(sel_locs, asrecarray=True, usemask=False) fret_locs = _lib.append_to_rec(fret_locs, _np.array(fret_events), 'fret') fret_dict['fret_events'] = _np.array(fret_events) fret_dict['fret_timepoints'] = fret_timepoints fret_dict['acc_trace'] = acc_trace fret_dict['don_trace'] = don_trace fret_dict['frames'] = xvec fret_dict['maxframes'] = max_frames return fret_dict, fret_locs
def list_sess_units(bird, sess, sorted=False): shanks = et.get_shanks_list(bird, sess) sess_units = None for shank in shanks: shank_units = list_shank_units(bird, sess, shank, sorted=False) if sess_units is None: sess_units = shank_units else: sess_units = rfn.stack_arrays((sess_units, shank_units)) return sess_units
def root2pandas(files, tree_name, **kwargs): # -- process ntuples into rec arrays ss = stack_arrays([ root2array(fpath, tree_name, **kwargs).view(numpy.recarray) for fpath in files ]) try: return pandas.DataFrame(ss) except Exception: return pandas.DataFrame(ss.data)
def test_checktitles(self): # Test using titles in the field names adtype = [(("a", "A"), int), (("b", "B"), bool), (("c", "C"), float)] a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) bdtype = [(("a", "A"), int), (("b", "B"), bool), (("c", "C"), float)] b = ma.array([(4, 5, 6)], dtype=bdtype) test = stack_arrays((a, b)) control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], dtype=bdtype) assert_equal(test, control) assert_equal(test.mask, control.mask)
def summarize_chamber_data(self,analyzed_chamber_data): summarized_total_data = self.summarize_data(analyzed_chamber_data) status_array = numpy.array(['Total']*len(summarized_total_data),dtype='|S25') summarized_chamber_data = recfunctions.append_fields(summarized_total_data, 'status', status_array, dtypes='|S25', usemask=False) air_before_data = analyzed_chamber_data[analyzed_chamber_data['status']=='AirBefore'] if air_before_data.size != 0: summarized_air_before_data = self.summarize_data(air_before_data) status_array = numpy.array(['AirBefore']*len(summarized_air_before_data),dtype='|S25') summarized_air_before_data = recfunctions.append_fields(summarized_air_before_data, 'status', status_array, dtypes='|S25', usemask=False) summarized_chamber_data = recfunctions.stack_arrays((summarized_chamber_data,summarized_air_before_data),usemask=False) ethanol_data = analyzed_chamber_data[analyzed_chamber_data['status']=='Ethanol'] summarized_ethanol_data = self.summarize_data(ethanol_data) status_array = numpy.array(['Ethanol']*len(summarized_ethanol_data),dtype='|S25') summarized_ethanol_data = recfunctions.append_fields(summarized_ethanol_data, 'status', status_array, dtypes='|S25', usemask=False) summarized_chamber_data = recfunctions.stack_arrays((summarized_chamber_data,summarized_ethanol_data),usemask=False) air_after_data = analyzed_chamber_data[analyzed_chamber_data['status']=='AirAfter'] if air_after_data.size != 0: summarized_air_after_data = self.summarize_data(air_after_data) status_array = numpy.array(['AirAfter']*len(summarized_air_after_data),dtype='|S25') summarized_air_after_data = recfunctions.append_fields(summarized_air_after_data, 'status', status_array, dtypes='|S25', usemask=False) summarized_chamber_data = recfunctions.stack_arrays((summarized_chamber_data,summarized_air_after_data),usemask=False) return summarized_chamber_data
def test_unnamed_and_named_fields(self): # Test combination of arrays w/ & w/o named fields (_, x, _, z) = self.data test = stack_arrays((x, z)) control = ma.array( [(1, -1, -1), (2, -1, -1), (-1, "A", 1), (-1, "B", 2)], mask=[(0, 1, 1), (0, 1, 1), (1, 0, 0), (1, 0, 0)], dtype=[("f0", int), ("A", "|S3"), ("B", float)], ) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, x)) control = ma.array( [ ("A", 1, -1), ("B", 2, -1), (-1, -1, 1), (-1, -1, 2), ], mask=[(0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)], dtype=[("A", "|S3"), ("B", float), ("f2", int)], ) assert_equal(test, control) assert_equal(test.mask, control.mask) test = stack_arrays((z, z, x)) control = ma.array( [ ("A", 1, -1), ("B", 2, -1), ("A", 1, -1), ("B", 2, -1), (-1, -1, 1), (-1, -1, 2), ], mask=[(0, 0, 1), (0, 0, 1), (0, 0, 1), (0, 0, 1), (1, 1, 0), (1, 1, 0)], dtype=[("A", "|S3"), ("B", float), ("f2", int)], ) assert_equal(test, control)
def toRecArray(self,returnType='RealImag'): ''' Function that returns a numpy.recarray for a SimpegMT impedance data object. :param str returnType: Switches between returning a rec array where the impedance is split to real and imaginary ('RealImag') or is a complex ('Complex') ''' # Define the record fields dtRI = [('freq',float),('x',float),('y',float),('z',float),('zxxr',float),('zxxi',float),('zxyr',float),('zxyi',float), ('zyxr',float),('zyxi',float),('zyyr',float),('zyyi',float),('tzxr',float),('tzxi',float),('tzyr',float),('tzyi',float)] dtCP = [('freq',float),('x',float),('y',float),('z',float),('zxx',complex),('zxy',complex),('zyx',complex),('zyy',complex),('tzx',complex),('tzy',complex)] impList = ['zxxr','zxxi','zxyr','zxyi','zyxr','zyxi','zyyr','zyyi'] for src in self.survey.srcList: # Temp array for all the receivers of the source. # Note: needs to be written more generally, using diffterent rxTypes and not all the data at the locaitons # Assume the same locs for all RX locs = src.rxList[0].locs if locs.shape[1] == 1: locs = np.hstack((np.array([[0.0,0.0]]),locs)) elif locs.shape[1] == 2: locs = np.hstack((np.array([[0.0]]),locs)) tArrRec = np.concatenate((src.freq*np.ones((locs.shape[0],1)),locs,np.nan*np.ones((locs.shape[0],12))),axis=1).view(dtRI) # np.array([(src.freq,rx.locs[0,0],rx.locs[0,1],rx.locs[0,2],np.nan ,np.nan ,np.nan ,np.nan ,np.nan ,np.nan ,np.nan ,np.nan ) for rx in src.rxList],dtype=dtRI) # Get the type and the value for the DataMT object as a list typeList = [[rx.rxType.replace('z1d','zyx'),self[src,rx]] for rx in src.rxList] # Insert the values to the temp array for nr,(key,val) in enumerate(typeList): tArrRec[key] = mkvc(val,2) # Masked array mArrRec = np.ma.MaskedArray(rec2ndarr(tArrRec),mask=np.isnan(rec2ndarr(tArrRec))).view(dtype=tArrRec.dtype) # Unique freq and loc of the masked array uniFLmarr = np.unique(mArrRec[['freq','x','y','z']]).copy() try: outTemp = recFunc.stack_arrays((outTemp,mArrRec)) #outTemp = np.concatenate((outTemp,dataBlock),axis=0) except NameError as e: outTemp = mArrRec if 'RealImag' in returnType: outArr = outTemp elif 'Complex' in returnType: # Add the real and imaginary to a complex number outArr = np.empty(outTemp.shape,dtype=dtCP) for comp in ['freq','x','y','z']: outArr[comp] = outTemp[comp].copy() for comp in ['zxx','zxy','zyx','zyy','tzx','tzy']: outArr[comp] = outTemp[comp+'r'].copy() + 1j*outTemp[comp+'i'].copy() else: raise NotImplementedError('{:s} is not implemented, as to be RealImag or Complex.') # Return return outArr
def combine_data(data_all, data_new, dict): """ combine_data reads in the data from a series of results_files, and includes the loc_IDs Inputs: results_file Output: data array """ #========================================================================== import numpy as np import numpy.lib.recfunctions as rfn #========================================================================== print ' Combining data' for key, value in dict.items(): # Define a new column that is the same value for everyone in the # new data array col = np.array(value) col = np.repeat(col, len(data_new)) # Add this column to the data_new recarray if key == 'b0_order': data_new = rfn.append_fields(data_new, key, col, usemask=False, asrecarray=True, dtypes='S100') else: data_new = rfn.append_fields(data_new, key, col, usemask=False, asrecarray=True) # If data_all exists, then join data_new to the end of it if not data_all == None: data_all = rfn.stack_arrays((data_all, data_new), usemask=False, asrecarray=True) # If data_all doesn't yet exist then data_new becomes data_all else: data_all = data_new data_all.sort(order='loc_id') return data_all
def test_checktitles(self): # Test using titles in the field names adtype = [(('a', 'A'), int), (('b', 'B'), bool), (('c', 'C'), float)] a = ma.array([(1, 2, 3)], mask=[(0, 1, 0)], dtype=adtype) bdtype = [(('a', 'A'), int), (('b', 'B'), bool), (('c', 'C'), float)] b = ma.array([(4, 5, 6)], dtype=bdtype) test = stack_arrays((a, b)) control = ma.array([(1, 2, 3), (4, 5, 6)], mask=[(0, 1, 0), (0, 0, 0)], dtype=bdtype) assert_equal(test, control) assert_equal(test.mask, control.mask)
def saveDataSet(directory, filename, dataSet, format, header): nRows = dataSet.shape[0] firstDate = dataSet['dateAndTime'][0] lastDate = dataSet['dateAndTime'][nRows-1] nDays = (lastDate - firstDate).days dates = [firstDate + timedelta(x) for x in range(0, nDays+1)] for date in dates: dateStr = date.strftime('%Y%m%d') filenameStr = dateStr + filename rowsOnDate = [dataSet[i] for i in range(0, nRows) if dataSet['dateAndTime'][i].date() == date.date()] rowsOnDate = rfn.stack_arrays(rowsOnDate,usemask=False) np.savetxt(join(directory, filenameStr), rowsOnDate, fmt=format, header=header, comments='')
def recarray_from_pycbc_live(source, ifo=None, columns=None, nproc=1, **kwargs): """Read a `GWRecArray` from one or more PyCBC live files """ source = file_list(source) if nproc > 1: from ...io.cache import read_cache return read_cache(source, GWRecArray, nproc, None, ifo=ifo, columns=columns, format="pycbc_live", **kwargs) source = filter_empty_files(source, ifo=ifo) arrays = [recarray_from_file(x, ifo=ifo, columns=columns, **kwargs) for x in source] return recfunctions.stack_arrays(arrays, asrecarray=True, usemask=False, autoconvert=True).view(GWRecArray)