def simulate(cls, sample_obj, color=None, **parameter): """ return simulated instance of measurement depending on parameters """ # get measurement directions in array of D,I pairs mdirs = parameter.get('mdirs', [[0.0, 0.0], [90.0, 0.0], [0.0, 90.0]]) # get eigenvalues evals = list(parameter.get('evals', [1.0, 1.0, 1.0])) if len(evals) != 3: raise RuntimeError('got %d eigenvalues instead of 3' % len(evals)) # get random measurement errors measerr = parameter.get('measerr', 0) # todo: normalize evals to 1? R = Anisotropy.createDiagonalTensor(*evals) #todo: also implement 1D measurement data = RockPyData(column_names=['d', 'i', 'x', 'y', 'z']) for mdir in mdirs: # M = R * H errs = [measerr * random() * 2 - measerr for i in (1,2,3)] measurement = np.dot(R, DIL2XYZ((mdir[0], mdir[1], 1))) + errs data = data.append_rows(np.hstack([np.array(mdir), measurement])) data.define_alias('variable', ('d', 'i')) mdata = {'data': data} return cls(sample_obj, 'anisotropy', mfile=None, mdata=mdata, machine='simulation', color=color, **parameter)
def calc_all_mean_results(self, filtered=False, **parameter): """ Calculates the mean out of all results Parameters ---------- filtered: parameter: """ out = None for mtype in self.mtypes: for stype in self.mtype_stype_dict[mtype]: for sval in self.stype_sval_dict[stype]: results = self.all_results(mtype=mtype, stype=stype, sval=sval, filtered=filtered, **parameter) results.define_alias('variable', ['stype ' + stype]) data = np.mean(results.v, axis=0) err = np.std(results.v, axis=0) if not out: out = RockPyData(column_names=results.column_names, data=data) out.e = err.reshape(1, len(err)) else: append = RockPyData(column_names=results.column_names, data=data) append.e = err.reshape(1, len(err)) out = out.append_rows(data=append.data) self._mean_results = out return out
def test_add_errors(self): d = RockPyData(column_names=['A', 'B']) #d['A'].v = 1 # Attribute Error NoneType has no attribute, maybe initialize to np.nan? #d['B'] = 2 #d['A'].e = 4 #d['B'].e = 5 d = d.append_rows([1, 2]) #print d d.e = [[4, 5]] self.assertEqual(5., d['B'].e)
def format_ani(self): self.header = self.machine_data.header mdirs = self.machine_data.mdirs measurements = self.machine_data.data #do we have scalar or vectorial measurements? if len(measurements.flatten()) == len(mdirs): #scalar data = RockPyData(column_names=['d', 'i', 'm']) elif len(measurements.flatten()) / len(mdirs) == 3: #vectorial data = RockPyData(column_names=['d', 'i', 'x', 'y', 'z']) else: Anisotropy.logger.error("anisotropy measurements have %d components") return for idx in range(len(mdirs)): data = data.append_rows(np.hstack([np.array(mdirs[idx]), measurements[idx]])) data.define_alias('variable', ('d', 'i')) self._data['data'] = data
def format_ani(self): self.header = self.machine_data.header mdirs = self.machine_data.mdirs measurements = self.machine_data.data #do we have scalar or vectorial measurements? if len(measurements.flatten()) == len(mdirs): #scalar data = RockPyData(column_names=['d', 'i', 'm']) elif len(measurements.flatten()) / len(mdirs) == 3: #vectorial data = RockPyData(column_names=['d', 'i', 'x', 'y', 'z']) else: Anisotropy.logger.error( "anisotropy measurements have %d components") return for idx in range(len(mdirs)): data = data.append_rows( np.hstack([np.array(mdirs[idx]), measurements[idx]])) data.define_alias('variable', ('d', 'i')) self._data['data'] = data
def simulate(cls, sample_obj, color=None, **parameter): """ return simulated instance of measurement depending on parameters """ # get measurement directions in array of D,I pairs mdirs = parameter.get('mdirs', [[0.0, 0.0], [90.0, 0.0], [0.0, 90.0]]) # get eigenvalues evals = list(parameter.get('evals', [1.0, 1.0, 1.0])) if len(evals) != 3: raise RuntimeError('got %d eigenvalues instead of 3' % len(evals)) # get random measurement errors measerr = parameter.get('measerr', 0) # todo: normalize evals to 1? R = Anisotropy.createDiagonalTensor(*evals) #todo: also implement 1D measurement data = RockPyData(column_names=['d', 'i', 'x', 'y', 'z']) for mdir in mdirs: # M = R * H errs = [measerr * random() * 2 - measerr for i in (1, 2, 3)] measurement = np.dot(R, DIL2XYZ((mdir[0], mdir[1], 1))) + errs data = data.append_rows(np.hstack([np.array(mdir), measurement])) data.define_alias('variable', ('d', 'i')) mdata = {'data': data} return cls(sample_obj, 'anisotropy', mfile=None, mdata=mdata, machine='simulation', color=color, **parameter)
class TestRockPyData(TestCase): def setUp(self): # run before each test self.testdata = ((1, 2, 3, 4), (1, 6, 7, 8), (1, 2, 11, 12), (1, 6, 55, 66)) self.col_names = ('F', 'Mx', 'My', 'Mz') self.row_names = ('1.Zeile', '2.Zeile_A', '3.Zeile', '4.Zeile_A') self.units = ('T', 'mT', 'fT', 'pT') self.RPD = RockPyData(column_names=self.col_names, row_names=self.row_names, units=self.units, data=self.testdata) def test_column_names(self): self.assertEqual(self.RPD.column_names, list(self.col_names)) def test_column_count(self): self.assertEqual(self.RPD.column_count, len(self.col_names)) def test__find_duplicate_variable_rows(self): # self.assertTrue((self.RPD._find_duplicate_variables()[0] == np.array([0, 1, 2])).all()) self.assertEqual(self.RPD._find_duplicate_variable_rows(), [(0, 1, 2, 3)]) # redefine variabe alias to the first two columns self.RPD.define_alias('variable', ('F', 'Mx')) self.assertEqual(self.RPD._find_duplicate_variable_rows(), [(0, 2), (1, 3)]) def test_rename_column(self): self.RPD.rename_column('Mx', 'M_x') self.assertEqual(self.RPD.column_names, ['F', 'M_x', 'My', 'Mz']) def test_append_rows(self): d1 = [[5, 6, 7, 8], [9, 10, 11, 12]] self.RPD = self.RPD.append_rows(d1, ('5.Zeile', '6.Zeile')) self.assertTrue(np.array_equal(self.RPD.v[-2:, :], np.array(d1))) d2 = [5, 6, 7, 8] self.RPD = self.RPD.append_rows(d2, '5.Zeile') self.assertTrue(np.array_equal(self.RPD.v[-1, :], np.array(d2))) # lets try with other RockPyData object rpd = copy.deepcopy(self.RPD) rpd.rename_column('Mx', 'M_x') self.RPD = self.RPD.append_rows(rpd) # TODO: add assert #print self.RPD def test_delete_rows(self): self.RPD = self.RPD.delete_rows((0, 2)) self.assertTrue(np.array_equal(self.RPD.v, np.array(self.testdata)[(1, 3), :])) def test_eliminate_duplicate_variable_rows(self): # check for one variable column self.RPD = self.RPD.eliminate_duplicate_variable_rows() self.assertTrue(np.array_equal(self.RPD.v, np.array([]).reshape(0, 4))) def test_eliminate_duplicate_variable_rows2(self): # check for two variable columns self.RPD.define_alias('variable', ('F', 'Mx')) rpd = self.RPD.eliminate_duplicate_variable_rows(substfunc='mean') self.assertTrue(np.array_equal(rpd.v, np.array([[1., 2., 7., 8.], [1., 6., 31., 37.]]))) self.assertTrue(np.array_equal(rpd.e, np.array([[0., 0., 4., 4.], [0., 0., 24., 29.]]))) rpd = self.RPD.eliminate_duplicate_variable_rows(substfunc='last') self.assertTrue(np.array_equal(rpd.v, np.array([[1., 2., 11., 12.], [1., 6., 55., 66.]]))) def test_mean(self): self.RPD = self.RPD.mean() self.assertTrue(np.array_equal(self.RPD.v, np.array([[1., 4., 19., 22.5]]))) np.testing.assert_allclose(self.RPD.e, np.array([[0., 2., 20.976, 25.273]]), atol=0.01) def test_max(self): self.RPD = self.RPD.max() self.assertTrue(np.array_equal(self.RPD.v, np.array([[1., 6., 55., 66.]]))) def test_filter_row_names(self): self.assertEqual(self.RPD.filter_row_names(('1.Zeile', '3.Zeile')).row_names, ['1.Zeile', '3.Zeile']) def test_filter_match_row_names(self): # get all rows ending with '_A' self.assertEqual(self.RPD.filter_match_row_names('.*_A').row_names, ['2.Zeile_A', '4.Zeile_A']) def test_append_columns(self): cb = self.RPD.column_count d = (8, 7, 6, 5) self.RPD = self.RPD.append_columns('neue Spalte', d) self.assertEqual(cb + 1, self.RPD.column_count) self.assertTrue(np.array_equal(self.RPD['neue Spalte'].v, np.array(d))) def test_sort(self): self.assertTrue(np.array_equal(self.RPD.sort('Mx')['Mx'].v, np.array((2, 2, 6, 6)))) #def test_interpolate(self): # self.RPD.define_alias('variable', 'My') # iv = (1, 11, 33, 55, 100) # self.assertTrue(np.array_equal((self.RPD.interpolate(iv))['My'].v, np.array(iv))) # self.assertTrue(np.array_equal((self.RPD.interpolate(iv))['Mx'].v[1:-1], np.array([2., 4., 6.]))) def test_magnitude(self): self.RPD.define_alias('m', ('Mx', 'My', 'Mz')) self.RPD = self.RPD.append_columns('mag', self.RPD.magnitude('m')) np.testing.assert_allclose(self.RPD['mag'].v, np.array([5.38516481, 12.20655562, 16.40121947, 86.12200648]), atol=1e-5) def test_column_names_to_indices(self): self.assertEqual( self.RPD.column_names_to_indices(('Mx', 'Mz')), [1,3]) def test_interation(self): # TODO: add proper assertion for l in self.RPD: #print l pass def test_add_errors(self): d = RockPyData(column_names=['A', 'B']) #d['A'].v = 1 # Attribute Error NoneType has no attribute, maybe initialize to np.nan? #d['B'] = 2 #d['A'].e = 4 #d['B'].e = 5 d = d.append_rows([1, 2]) #print d d.e = [[4, 5]] self.assertEqual(5., d['B'].e) def test_data_assignment(self): print self.RPD # set only values self.RPD['Mx'] = [1.1, 1.2, 1.3, 1.4] print self.RPD # set values and errors self.RPD['Mx'] = [[[1.1, 0.11]], [[1.2, 0.12]], [[1.3, 0.13]], [[1.4, 0.14]]] print self.RPD
class SampleGroup(object): """ Container for Samples, has special calculation methods """ log = logging.getLogger(__name__) count = 0 def __init__(self, name=None, sample_list=None, sample_file=None, **options): SampleGroup.count += 1 SampleGroup.log.info('CRATING new << samplegroup >>') # ## initialize if name is None: name = 'SampleGroup %04i' % (self.count) self.name = name self.samples = {} self.results = None self.color = None if sample_file: self.import_multiple_samples(sample_file, **options) self._info_dict = self.__create_info_dict() if sample_list: self.add_samples(sample_list) def __getstate__(self): ''' returned dict will be pickled :return: ''' state = {k: v for k, v in self.__dict__.iteritems() if k in ( 'name', 'samples', 'results' ) } return state def __setstate__(self, state): self.__dict__.update(state) # self.recalc_info_dict() def __repr__(self): # return super(SampleGroup, self).__repr__() return "<RockPy.SampleGroup - << %s - %i samples >> >" % (self.name, len(self.sample_names)) def __getitem__(self, item): if item in self.sdict: return self.samples[item] try: return self.sample_list[item] except KeyError: raise KeyError('SampleGroup has no Sample << %s >>' % item) def import_multiple_samples(self, sample_file, length_unit='mm', mass_unit='mg', **options): """ imports a csv file with sample_names masses and dimensions and creates the sample_objects :param sample_file: :param length_unit: :param mass_unit: :return: """ reader_object = csv.reader(open(sample_file), delimiter='\t') r_list = [i for i in reader_object if not '#' in i] header = r_list[0] d_dict = {i[0]: {header[j].lower(): float(i[j]) for j in range(1, len(i))} for i in r_list[1:]} for sample in d_dict: mass = d_dict[sample].get('mass', None) height = d_dict[sample].get('height', None) diameter = d_dict[sample].get('diameter', None) S = Sample(sample, mass=mass, height=height, diameter=diameter, mass_unit=mass_unit, length_unit=length_unit) self.samples.update({sample: S}) def pop_sample(self, sample_name): """ remove samples from sample_group will take str(sample_name), list(sample_name) """ if not isinstance(sample_name, list): sample_name = [sample_name] for sample in sample_name: if sample in self.samples: self.samples.pop(sample) return self # ### DATA properties @property def sample_list(self): return self.slist @property def sample_names(self): return sorted(self.samples.keys()) def add_samples(self, s_list): """ Adds a sample to the sample dictionary and adds the sample_group to sample.sample_groups Parameters ---------- s_list: single item or list single items get transformed to list Note ---- Uses _item_to_list for list conversion """ s_list = _to_list(s_list) self.samples.update(self._sdict_from_slist(s_list=s_list)) self.log.info('ADDING sample(s) %s' % [s.name for s in s_list]) for s in s_list: s.sgroups.append(self) self.add_s2_info_dict(s) def remove_samples(self, s_list): """ Removes a sample from the sgroup.samples dictionary and removes the sgroup from sample.sgroups Parameters ---------- s_list: single item or list single items get transformed to list Note ---- Uses _item_to_list for list conversion """ s_list = _to_list(s_list) for s in s_list: self.sdict[s].sgroups.remove(self) self.samples.pop(s) # ## components of container # lists @property def slist(self): out = [self.samples[i] for i in sorted(self.samples.keys())] return out @property def sdict(self): return {s.name: s for s in self.sample_list} @property def mtypes(self): out = [sample.mtypes for sample in self.sample_list] return self.__sort_list_set(out) @property def stypes(self): out = [] for sample in self.sample_list: out.extend(sample.stypes) return self.__sort_list_set(out) @property def svals(self): out = [] for sample in self.sample_list: out.extend(sample.svals) return self.__sort_list_set(out) # measurement: samples @property def mtype_sdict(self): out = {mtype: self.get_samples(mtypes=mtype) for mtype in self.mtypes} return out # mtype: stypes @property def mtype_stype_dict(self): """ returns a list of tratment types within a certain measurement type """ out = {} for mtype in self.mtypes: aux = [] for s in self.get_samples(mtypes=mtype): for t in s.mtype_tdict[mtype]: aux.extend([t.stype]) out.update({mtype: self.__sort_list_set(aux)}) return out # mtype: svals @property def mtype_svals_dict(self): """ returns a list of tratment types within a certain measurement type """ out = {} for mtype in self.mtypes: aux = [] for s in self.get_samples(mtypes=mtype): for t in s.mtype_tdict[mtype]: aux.extend([t.value]) out.update({mtype: self.__sort_list_set(aux)}) return out @property def stype_sval_dict(self): stype_sval_dict = {i: self._get_all_series_values(i) for i in self.stypes} return stype_sval_dict @property def mtype_dict(self): m_dict = {i: [m for s in self.sample_list for m in s.get_measurements(i)] for i in self.mtypes} return m_dict def _get_all_series_values(self, stype): return sorted(list(set([n.value for j in self.sample_list for i in j.measurements for n in i.series if n.stype == stype]))) @property def mtypes(self): """ looks through all samples and returns measurement types """ return sorted(list(set([i.mtype for j in self.sample_list for i in j.measurements]))) @property def stypes(self): """ looks through all samples and returns measurement types """ return sorted(list(set([t for sample in self.sample_list for t in sample.stypes]))) def stype_results(self, **parameter): if not self.results: self.results = self.calc_all(**parameter) stypes = [i for i in self.results.column_names if 'stype' in i] out = {i.split()[1]: {round(j, 2): None for j in self.results[i].v} for i in stypes} for stype in out: for sval in out[stype]: key = 'stype ' + stype idx = np.where(self.results[key].v == sval)[0] out[stype][sval] = self.results.filter_idx(idx) return out def _sdict_from_slist(self, s_list): """ creates a dictionary with s.name:s for each sample in a list of samples Parameters ---------- s_list: sample or list Returns ------- dict dictionary with {sample.name : sample} for each sample in s_list Note ---- uses _to_list for item -> list conversion """ s_list = _to_list(s_list) out = {s.name: s for s in s_list} return out def calc_all(self, **parameter): for sample in self.sample_list: label = sample.name sample.calc_all(**parameter) results = sample.results if self.results is None: self.results = RockPyData(column_names=results.column_names, data=results.data, row_names=[label for i in results.data]) else: rpdata = RockPyData(column_names=results.column_names, data=results.data, row_names=[label for i in results.data]) self.results = self.results.append_rows(rpdata) return self.results def average_results(self, **parameter): """ makes averages of all calculations for all samples in group. Only samples with same series are averaged prams: parameter are calculation parameters, has to be a dictionary """ substfunc = parameter.pop('substfunc', 'mean') out = None stype_results = self.stype_results(**parameter) for stype in stype_results: for sval in sorted(stype_results[stype].keys()): aux = stype_results[stype][sval] aux.define_alias('variable', 'stype ' + stype) aux = condense(aux, substfunc=substfunc) if out == None: out = {stype: aux} else: out[stype] = out[stype].append_rows(aux) return out def __add__(self, other): self_copy = SampleGroup(sample_list=self.sample_list) self_copy.samples.update(other.samples) return self_copy def _mlist_to_tdict(self, mlist): """ takes a list of measurements looks for common stypes """ stypes = sorted(list(set([m.stypes for m in mlist]))) return {stype: [m for m in mlist if stype in m.stypes] for stype in stypes} def get_measurements(self, snames=None, mtypes=None, series=None, stypes=None, svals=None, sval_range=None, mean=False, invert=False, **options): """ Wrapper, for finding measurements, calls get_samples first and sample.get_measurements """ samples = self.get_samples(snames, mtypes, stypes, svals, sval_range) out = [] for sample in samples: try: out.extend(sample.get_measurements(mtypes=mtypes, series=series, stypes=stypes, svals=svals, sval_range=sval_range, mean=mean, invert=invert, )) except TypeError: pass return out def delete_measurements(self, sname=None, mtype=None, stype=None, sval=None, sval_range=None): """ deletes measurements according to criteria """ samples = self.get_samples(snames=sname, mtypes=mtype, stypes=stype, svals=sval, sval_range=sval_range) # search for samples with measurement fitting criteria for sample in samples: sample.remove_measurements(mtypes=mtype, stypes=stype, svals=sval, sval_range=sval_range) # individually delete measurements from samples def get_samples(self, snames=None, mtypes=None, stypes=None, svals=None, sval_range=None): """ Primary search function for all parameters Parameters ---------- snames: list, str list of names or a single name of the sample to be retrieved """ if svals is None: t_value = np.nan else: t_value = svals out = [] if snames: snames = _to_list(snames) for s in snames: try: out.append(self.samples[s]) except KeyError: raise KeyError('RockPy.sample_group does not contain sample << %s >>' % s) if len(out) == 0: raise KeyError('RockPy.sample_group does not contain any samples') return else: out = self.sample_list if mtypes: mtypes = _to_list(mtypes) out = [s for s in out for mtype in mtypes if mtype in s.mtypes] if len(out) == 0: raise KeyError('RockPy.sample_group does not contain sample with mtypes: << %s >>' % mtypes) return if stypes: stypes = _to_list(stypes) out = [s for s in out for stype in stypes if stype in s.stypes] if len(out) == 0: raise KeyError('RockPy.sample_group does not contain sample with stypes: << %s >>' % stypes) return if svals: svals = _to_list(svals) out = [s for s in out for sval in svals for stype in stypes if sval in s.stype_sval_dict[stype]] if len(out) == 0: self.log.error( 'RockPy.sample_group does not contain sample with (stypes, svals) pair: << %s, %s >>' % ( str(stypes), str(t_value))) return [] if sval_range: if not isinstance(sval_range, list): sval_range = [0, sval_range] else: if len(sval_range) == 1: sval_range = [0] + sval_range out = [s for s in out for tv in s.stype_sval_dict[stype] for stype in stypes if tv <= max(sval_range) if tv >= min(sval_range)] if len(out) == 0: raise KeyError( 'RockPy.sample_group does not contain sample with (stypes, sval_range) pair: << %s, %.2f >>' % ( stypes, t_value)) return if len(out) == 0: SampleGroup.log.error( 'UNABLE to find sample with << %s, %s, %s, %.2f >>' % (snames, mtypes, stypes, t_value)) return out def create_mean_sample(self, reference=None, ref_dtype='mag', vval=None, norm_dtypes='all', norm_method='max', interpolate=True, substfunc='mean', ): """ Creates a mean sample out of all samples :param reference: :param ref_dtype: :param dtye: :param vval: :param norm_method: :param interpolate: :param substfunc: :return: """ # create new sample_obj mean_sample = Sample(name='mean ' + self.name) # get all measurements from all samples in sample group and add to mean sample mean_sample.measurements = [m for s in self.sample_list for m in s.measurements] mean_sample.populate_mdict() for mtype in sorted(mean_sample.mdict['mtype_stype_sval']): if not mtype in ['mass', 'diameter', 'height', 'volume', 'x_len', 'y_len', 'z_len']: for stype in sorted(mean_sample.mdict['mtype_stype_sval'][mtype]): for sval in sorted(mean_sample.mdict['mtype_stype_sval'][mtype][stype]): series = None #initialize # normalize if needed if reference or vval: for i, m in enumerate(mean_sample.mdict['mtype_stype_sval'][mtype][stype][sval]): m = m.normalize( reference=reference, ref_dtype=ref_dtype, norm_dtypes=norm_dtypes, vval=vval, norm_method=norm_method) series = m.get_series(stypes=stype, svals=sval)[0] # print m, m.series, stype, sval # calculating the mean of all measurements M = mean_sample.mean_measurement(mtype=mtype, stype=stype, sval=sval, substfunc=substfunc, interpolate=interpolate, # reference=reference, ref_dtype=ref_dtype, # norm_dtypes=norm_dtypes, # vval=vval, norm_method=norm_method, ) if series: M.add_sval(series_obj=series) # print M.th if reference or vval: M.is_normalized = True M.norm = [reference, ref_dtype, vval, norm_method, np.nan] mean_sample.mean_measurements.append(M) mean_sample.is_mean = True # set is_mean flag after all measuerements are created return mean_sample def __get_variable_list(self, rpdata_list): out = [] for rp in rpdata_list: out.extend(rp['variable'].v) return self.__sort_list_set(out) def __sort_list_set(self, values): """ returns a sorted list of non duplicate values :param values: :return: """ return sorted(list(set(values))) ''' INFODICT ''' def __create_info_dict(self): """ creates all info dictionaries Returns ------- dict Dictionary with a permutation of sample ,type, stype and sval. """ d = ['mtype', 'stype', 'sval'] keys = ['_'.join(i) for n in range(5) for i in itertools.permutations(d, n) if not len(i) == 0] out = {i: {} for i in keys} return out # @profile() def add_s2_info_dict(self, s): """ Adds a sample to the infodict. Parameters ---------- s: RockPySample The sample that should be added to the dictionary """ keys = self.info_dict.keys() # all possible keys for key in keys: # split keys into levels split_keys = key.split('_') for i, level in enumerate(split_keys): # i == level number, n == maximal level # if i == n _> last level -> list instead of dict n = len(split_keys) - 1 # level 0 for e0 in s.info_dict[key]: # if only 1 level if i == n == 0: # create key with empty list self._info_dict[key].setdefault(e0, list()) # add sample if not already in list if not s in self._info_dict[key][e0]: self._info_dict[key][e0].append(s) continue else: # if not last entry generate key: dict() pair self._info_dict[key].setdefault(e0, dict()) # level 1 for e1 in s.info_dict[key][e0]: if i == n == 1: self._info_dict[key][e0].setdefault(e1, list()) if not s in self._info_dict[key][e0][e1]: self._info_dict[key][e0][e1].append(s) continue elif i > 0: self._info_dict[key][e0].setdefault(e1, dict()) # level 2 for e2 in s.info_dict[key][e0][e1]: if i == n == 2: self._info_dict[key][e0][e1].setdefault(e2, list()) if not s in self._info_dict[key][e0][e1][e2]: self._info_dict[key][e0][e1][e2].append(s) continue elif i > 1: self._info_dict[key][e0][e1].setdefault(e2, dict()) def recalc_info_dict(self): """ Recalculates the info_dictionary with information of all samples and their corresponding measurements """ self._info_dict = self.__create_info_dict() map(self.add_s2_info_dict, self.slist) @property def info_dict(self): """ Property for easy access of info_dict. If '_info_dict' has not been created, it will create one. """ if not hasattr(self, '_info_dict'): self._info_dict = self.__create_info_dict() self.recalc_info_dict() return self._info_dict
class SampleGroup(object): """ Container for Samples, has special calculation methods """ log = logging.getLogger(__name__) count = 0 def __init__(self, name=None, sample_list=None, sample_file=None, **options): SampleGroup.count += 1 SampleGroup.log.info('CRATING new << samplegroup >>') # ## initialize if name is None: name = 'SampleGroup %04i' % (self.count) self.name = name self.samples = {} self.results = None self.color = None if sample_file: self.import_multiple_samples(sample_file, **options) self._info_dict = self.__create_info_dict() if sample_list: self.add_samples(sample_list) def __getstate__(self): ''' returned dict will be pickled :return: ''' state = { k: v for k, v in self.__dict__.iteritems() if k in ('name', 'samples', 'results') } return state def __setstate__(self, state): self.__dict__.update(state) # self.recalc_info_dict() def __repr__(self): # return super(SampleGroup, self).__repr__() return "<RockPy.SampleGroup - << %s - %i samples >> >" % ( self.name, len(self.sample_names)) def __getitem__(self, item): if item in self.sdict: return self.samples[item] try: return self.sample_list[item] except KeyError: raise KeyError('SampleGroup has no Sample << %s >>' % item) def import_multiple_samples(self, sample_file, length_unit='mm', mass_unit='mg', **options): """ imports a csv file with sample_names masses and dimensions and creates the sample_objects :param sample_file: :param length_unit: :param mass_unit: :return: """ reader_object = csv.reader(open(sample_file), delimiter='\t') r_list = [i for i in reader_object if not '#' in i] header = r_list[0] d_dict = { i[0]: {header[j].lower(): float(i[j]) for j in range(1, len(i))} for i in r_list[1:] } for sample in d_dict: mass = d_dict[sample].get('mass', None) height = d_dict[sample].get('height', None) diameter = d_dict[sample].get('diameter', None) S = Sample(sample, mass=mass, height=height, diameter=diameter, mass_unit=mass_unit, length_unit=length_unit) self.samples.update({sample: S}) def pop_sample(self, sample_name): """ remove samples from sample_group will take str(sample_name), list(sample_name) """ if not isinstance(sample_name, list): sample_name = [sample_name] for sample in sample_name: if sample in self.samples: self.samples.pop(sample) return self # ### DATA properties @property def sample_list(self): return self.slist @property def sample_names(self): return sorted(self.samples.keys()) def add_samples(self, s_list): """ Adds a sample to the sample dictionary and adds the sample_group to sample.sample_groups Parameters ---------- s_list: single item or list single items get transformed to list Note ---- Uses _item_to_list for list conversion """ s_list = _to_list(s_list) self.samples.update(self._sdict_from_slist(s_list=s_list)) self.log.info('ADDING sample(s) %s' % [s.name for s in s_list]) for s in s_list: s.sgroups.append(self) self.add_s2_info_dict(s) def remove_samples(self, s_list): """ Removes a sample from the sgroup.samples dictionary and removes the sgroup from sample.sgroups Parameters ---------- s_list: single item or list single items get transformed to list Note ---- Uses _item_to_list for list conversion """ s_list = _to_list(s_list) for s in s_list: self.sdict[s].sgroups.remove(self) self.samples.pop(s) # ## components of container # lists @property def slist(self): out = [self.samples[i] for i in sorted(self.samples.keys())] return out @property def sdict(self): return {s.name: s for s in self.sample_list} @property def mtypes(self): out = [sample.mtypes for sample in self.sample_list] return self.__sort_list_set(out) @property def stypes(self): out = [] for sample in self.sample_list: out.extend(sample.stypes) return self.__sort_list_set(out) @property def svals(self): out = [] for sample in self.sample_list: out.extend(sample.svals) return self.__sort_list_set(out) # measurement: samples @property def mtype_sdict(self): out = {mtype: self.get_samples(mtypes=mtype) for mtype in self.mtypes} return out # mtype: stypes @property def mtype_stype_dict(self): """ returns a list of tratment types within a certain measurement type """ out = {} for mtype in self.mtypes: aux = [] for s in self.get_samples(mtypes=mtype): for t in s.mtype_tdict[mtype]: aux.extend([t.stype]) out.update({mtype: self.__sort_list_set(aux)}) return out # mtype: svals @property def mtype_svals_dict(self): """ returns a list of tratment types within a certain measurement type """ out = {} for mtype in self.mtypes: aux = [] for s in self.get_samples(mtypes=mtype): for t in s.mtype_tdict[mtype]: aux.extend([t.value]) out.update({mtype: self.__sort_list_set(aux)}) return out @property def stype_sval_dict(self): stype_sval_dict = { i: self._get_all_series_values(i) for i in self.stypes } return stype_sval_dict @property def mtype_dict(self): m_dict = { i: [m for s in self.sample_list for m in s.get_measurements(i)] for i in self.mtypes } return m_dict def _get_all_series_values(self, stype): return sorted( list( set([ n.value for j in self.sample_list for i in j.measurements for n in i.series if n.stype == stype ]))) @property def mtypes(self): """ looks through all samples and returns measurement types """ return sorted( list( set([ i.mtype for j in self.sample_list for i in j.measurements ]))) @property def stypes(self): """ looks through all samples and returns measurement types """ return sorted( list(set([t for sample in self.sample_list for t in sample.stypes]))) def stype_results(self, **parameter): if not self.results: self.results = self.calc_all(**parameter) stypes = [i for i in self.results.column_names if 'stype' in i] out = { i.split()[1]: {round(j, 2): None for j in self.results[i].v} for i in stypes } for stype in out: for sval in out[stype]: key = 'stype ' + stype idx = np.where(self.results[key].v == sval)[0] out[stype][sval] = self.results.filter_idx(idx) return out def _sdict_from_slist(self, s_list): """ creates a dictionary with s.name:s for each sample in a list of samples Parameters ---------- s_list: sample or list Returns ------- dict dictionary with {sample.name : sample} for each sample in s_list Note ---- uses _to_list for item -> list conversion """ s_list = _to_list(s_list) out = {s.name: s for s in s_list} return out def calc_all(self, **parameter): for sample in self.sample_list: label = sample.name sample.calc_all(**parameter) results = sample.results if self.results is None: self.results = RockPyData( column_names=results.column_names, data=results.data, row_names=[label for i in results.data]) else: rpdata = RockPyData(column_names=results.column_names, data=results.data, row_names=[label for i in results.data]) self.results = self.results.append_rows(rpdata) return self.results def average_results(self, **parameter): """ makes averages of all calculations for all samples in group. Only samples with same series are averaged prams: parameter are calculation parameters, has to be a dictionary """ substfunc = parameter.pop('substfunc', 'mean') out = None stype_results = self.stype_results(**parameter) for stype in stype_results: for sval in sorted(stype_results[stype].keys()): aux = stype_results[stype][sval] aux.define_alias('variable', 'stype ' + stype) aux = condense(aux, substfunc=substfunc) if out == None: out = {stype: aux} else: out[stype] = out[stype].append_rows(aux) return out def __add__(self, other): self_copy = SampleGroup(sample_list=self.sample_list) self_copy.samples.update(other.samples) return self_copy def _mlist_to_tdict(self, mlist): """ takes a list of measurements looks for common stypes """ stypes = sorted(list(set([m.stypes for m in mlist]))) return { stype: [m for m in mlist if stype in m.stypes] for stype in stypes } def get_measurements(self, snames=None, mtypes=None, series=None, stypes=None, svals=None, sval_range=None, mean=False, invert=False, **options): """ Wrapper, for finding measurements, calls get_samples first and sample.get_measurements """ samples = self.get_samples(snames, mtypes, stypes, svals, sval_range) out = [] for sample in samples: try: out.extend( sample.get_measurements( mtypes=mtypes, series=series, stypes=stypes, svals=svals, sval_range=sval_range, mean=mean, invert=invert, )) except TypeError: pass return out def delete_measurements(self, sname=None, mtype=None, stype=None, sval=None, sval_range=None): """ deletes measurements according to criteria """ samples = self.get_samples( snames=sname, mtypes=mtype, stypes=stype, svals=sval, sval_range=sval_range ) # search for samples with measurement fitting criteria for sample in samples: sample.remove_measurements( mtypes=mtype, stypes=stype, svals=sval, sval_range=sval_range ) # individually delete measurements from samples def get_samples(self, snames=None, mtypes=None, stypes=None, svals=None, sval_range=None): """ Primary search function for all parameters Parameters ---------- snames: list, str list of names or a single name of the sample to be retrieved """ if svals is None: t_value = np.nan else: t_value = svals out = [] if snames: snames = _to_list(snames) for s in snames: try: out.append(self.samples[s]) except KeyError: raise KeyError( 'RockPy.sample_group does not contain sample << %s >>' % s) if len(out) == 0: raise KeyError( 'RockPy.sample_group does not contain any samples') return else: out = self.sample_list if mtypes: mtypes = _to_list(mtypes) out = [s for s in out for mtype in mtypes if mtype in s.mtypes] if len(out) == 0: raise KeyError( 'RockPy.sample_group does not contain sample with mtypes: << %s >>' % mtypes) return if stypes: stypes = _to_list(stypes) out = [s for s in out for stype in stypes if stype in s.stypes] if len(out) == 0: raise KeyError( 'RockPy.sample_group does not contain sample with stypes: << %s >>' % stypes) return if svals: svals = _to_list(svals) out = [ s for s in out for sval in svals for stype in stypes if sval in s.stype_sval_dict[stype] ] if len(out) == 0: self.log.error( 'RockPy.sample_group does not contain sample with (stypes, svals) pair: << %s, %s >>' % (str(stypes), str(t_value))) return [] if sval_range: if not isinstance(sval_range, list): sval_range = [0, sval_range] else: if len(sval_range) == 1: sval_range = [0] + sval_range out = [ s for s in out for tv in s.stype_sval_dict[stype] for stype in stypes if tv <= max(sval_range) if tv >= min(sval_range) ] if len(out) == 0: raise KeyError( 'RockPy.sample_group does not contain sample with (stypes, sval_range) pair: << %s, %.2f >>' % (stypes, t_value)) return if len(out) == 0: SampleGroup.log.error( 'UNABLE to find sample with << %s, %s, %s, %.2f >>' % (snames, mtypes, stypes, t_value)) return out def create_mean_sample( self, reference=None, ref_dtype='mag', vval=None, norm_dtypes='all', norm_method='max', interpolate=True, substfunc='mean', ): """ Creates a mean sample out of all samples :param reference: :param ref_dtype: :param dtye: :param vval: :param norm_method: :param interpolate: :param substfunc: :return: """ # create new sample_obj mean_sample = Sample(name='mean ' + self.name) # get all measurements from all samples in sample group and add to mean sample mean_sample.measurements = [ m for s in self.sample_list for m in s.measurements ] mean_sample.populate_mdict() for mtype in sorted(mean_sample.mdict['mtype_stype_sval']): if not mtype in [ 'mass', 'diameter', 'height', 'volume', 'x_len', 'y_len', 'z_len' ]: for stype in sorted( mean_sample.mdict['mtype_stype_sval'][mtype]): for sval in sorted(mean_sample.mdict['mtype_stype_sval'] [mtype][stype]): series = None #initialize # normalize if needed if reference or vval: for i, m in enumerate( mean_sample.mdict['mtype_stype_sval'] [mtype][stype][sval]): m = m.normalize(reference=reference, ref_dtype=ref_dtype, norm_dtypes=norm_dtypes, vval=vval, norm_method=norm_method) series = m.get_series(stypes=stype, svals=sval)[0] # print m, m.series, stype, sval # calculating the mean of all measurements M = mean_sample.mean_measurement( mtype=mtype, stype=stype, sval=sval, substfunc=substfunc, interpolate=interpolate, # reference=reference, ref_dtype=ref_dtype, # norm_dtypes=norm_dtypes, # vval=vval, norm_method=norm_method, ) if series: M.add_sval(series_obj=series) # print M.th if reference or vval: M.is_normalized = True M.norm = [ reference, ref_dtype, vval, norm_method, np.nan ] mean_sample.mean_measurements.append(M) mean_sample.is_mean = True # set is_mean flag after all measuerements are created return mean_sample def __get_variable_list(self, rpdata_list): out = [] for rp in rpdata_list: out.extend(rp['variable'].v) return self.__sort_list_set(out) def __sort_list_set(self, values): """ returns a sorted list of non duplicate values :param values: :return: """ return sorted(list(set(values))) ''' INFODICT ''' def __create_info_dict(self): """ creates all info dictionaries Returns ------- dict Dictionary with a permutation of sample ,type, stype and sval. """ d = ['mtype', 'stype', 'sval'] keys = [ '_'.join(i) for n in range(5) for i in itertools.permutations(d, n) if not len(i) == 0 ] out = {i: {} for i in keys} return out # @profile() def add_s2_info_dict(self, s): """ Adds a sample to the infodict. Parameters ---------- s: RockPySample The sample that should be added to the dictionary """ keys = self.info_dict.keys() # all possible keys for key in keys: # split keys into levels split_keys = key.split('_') for i, level in enumerate(split_keys): # i == level number, n == maximal level # if i == n _> last level -> list instead of dict n = len(split_keys) - 1 # level 0 for e0 in s.info_dict[key]: # if only 1 level if i == n == 0: # create key with empty list self._info_dict[key].setdefault(e0, list()) # add sample if not already in list if not s in self._info_dict[key][e0]: self._info_dict[key][e0].append(s) continue else: # if not last entry generate key: dict() pair self._info_dict[key].setdefault(e0, dict()) # level 1 for e1 in s.info_dict[key][e0]: if i == n == 1: self._info_dict[key][e0].setdefault(e1, list()) if not s in self._info_dict[key][e0][e1]: self._info_dict[key][e0][e1].append(s) continue elif i > 0: self._info_dict[key][e0].setdefault(e1, dict()) # level 2 for e2 in s.info_dict[key][e0][e1]: if i == n == 2: self._info_dict[key][e0][e1].setdefault( e2, list()) if not s in self._info_dict[key][e0][e1][ e2]: self._info_dict[key][e0][e1][ e2].append(s) continue elif i > 1: self._info_dict[key][e0][e1].setdefault( e2, dict()) def recalc_info_dict(self): """ Recalculates the info_dictionary with information of all samples and their corresponding measurements """ self._info_dict = self.__create_info_dict() map(self.add_s2_info_dict, self.slist) @property def info_dict(self): """ Property for easy access of info_dict. If '_info_dict' has not been created, it will create one. """ if not hasattr(self, '_info_dict'): self._info_dict = self.__create_info_dict() self.recalc_info_dict() return self._info_dict