Python RockPyData.append_rows Exemples, RockPy.Structure.data.RockPyData.append_rows Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : anisotropy.py Projet : RiccardaNaeve/RockPy

    def simulate(cls, sample_obj, color=None, **parameter):
        """
        return simulated instance of measurement depending on parameters
        """
        # get measurement directions in array of D,I pairs
        mdirs = parameter.get('mdirs', [[0.0, 0.0], [90.0, 0.0], [0.0, 90.0]])
        # get eigenvalues
        evals = list(parameter.get('evals', [1.0, 1.0, 1.0]))
        if len(evals) != 3:
            raise RuntimeError('got %d eigenvalues instead of 3' % len(evals))

        # get random measurement errors
        measerr = parameter.get('measerr', 0)

        # todo: normalize evals to 1?

        R = Anisotropy.createDiagonalTensor(*evals)

        #todo: also implement 1D measurement

        data = RockPyData(column_names=['d', 'i', 'x', 'y', 'z'])

        for mdir in mdirs:
            # M = R * H
            errs = [measerr * random() * 2 - measerr for i in (1,2,3)]
            measurement = np.dot(R, DIL2XYZ((mdir[0], mdir[1], 1))) + errs
            data = data.append_rows(np.hstack([np.array(mdir), measurement]))

        data.define_alias('variable', ('d', 'i'))

        mdata = {'data': data}

        return cls(sample_obj, 'anisotropy', mfile=None, mdata=mdata, machine='simulation', color=color, **parameter)

Exemple #2

0

Afficher le fichier

Fichier : sample.py Projet : RiccardaNaeve/RockPy

    def calc_all_mean_results(self, filtered=False, **parameter):
        """
        Calculates the mean out of all results

        Parameters
        ----------
           filtered:
           parameter:
        """
        out = None
        for mtype in self.mtypes:
            for stype in self.mtype_stype_dict[mtype]:
                for sval in self.stype_sval_dict[stype]:
                    results = self.all_results(mtype=mtype, stype=stype, sval=sval,
                                               filtered=filtered,
                                               **parameter)

                    results.define_alias('variable', ['stype ' + stype])

                    data = np.mean(results.v, axis=0)
                    err = np.std(results.v, axis=0)
                    if not out:
                        out = RockPyData(column_names=results.column_names, data=data)
                        out.e = err.reshape(1, len(err))
                    else:
                        append = RockPyData(column_names=results.column_names, data=data)
                        append.e = err.reshape(1, len(err))
                        out = out.append_rows(data=append.data)
        self._mean_results = out
        return out

Exemple #3

0

Afficher le fichier

Fichier : test_rockPyData.py Projet : yinyongqi/RockPy

    def test_add_errors(self):
        d = RockPyData(column_names=['A', 'B'])
        #d['A'].v = 1  # Attribute Error NoneType has no attribute, maybe initialize to np.nan?
        #d['B'] = 2
        #d['A'].e = 4
        #d['B'].e = 5
        d = d.append_rows([1, 2])
        #print d
        d.e = [[4, 5]]

        self.assertEqual(5., d['B'].e)

Exemple #4

0

Afficher le fichier

Fichier : anisotropy.py Projet : RiccardaNaeve/RockPy

    def format_ani(self):
        self.header = self.machine_data.header

        mdirs = self.machine_data.mdirs
        measurements = self.machine_data.data

        #do we have scalar or vectorial measurements?
        if len(measurements.flatten()) == len(mdirs):  #scalar
            data = RockPyData(column_names=['d', 'i', 'm'])
        elif len(measurements.flatten()) / len(mdirs) == 3:  #vectorial
            data = RockPyData(column_names=['d', 'i', 'x', 'y', 'z'])
        else:
            Anisotropy.logger.error("anisotropy measurements have %d components")
            return

        for idx in range(len(mdirs)):
            data = data.append_rows(np.hstack([np.array(mdirs[idx]), measurements[idx]]))

        data.define_alias('variable', ('d', 'i'))
        self._data['data'] = data

Exemple #5

0

Afficher le fichier

Fichier : anisotropy.py Projet : yinyongqi/RockPy

    def format_ani(self):
        self.header = self.machine_data.header

        mdirs = self.machine_data.mdirs
        measurements = self.machine_data.data

        #do we have scalar or vectorial measurements?
        if len(measurements.flatten()) == len(mdirs):  #scalar
            data = RockPyData(column_names=['d', 'i', 'm'])
        elif len(measurements.flatten()) / len(mdirs) == 3:  #vectorial
            data = RockPyData(column_names=['d', 'i', 'x', 'y', 'z'])
        else:
            Anisotropy.logger.error(
                "anisotropy measurements have %d components")
            return

        for idx in range(len(mdirs)):
            data = data.append_rows(
                np.hstack([np.array(mdirs[idx]), measurements[idx]]))

        data.define_alias('variable', ('d', 'i'))
        self._data['data'] = data

Exemple #6

0

Afficher le fichier

Fichier : anisotropy.py Projet : yinyongqi/RockPy

    def simulate(cls, sample_obj, color=None, **parameter):
        """
        return simulated instance of measurement depending on parameters
        """
        # get measurement directions in array of D,I pairs
        mdirs = parameter.get('mdirs', [[0.0, 0.0], [90.0, 0.0], [0.0, 90.0]])
        # get eigenvalues
        evals = list(parameter.get('evals', [1.0, 1.0, 1.0]))
        if len(evals) != 3:
            raise RuntimeError('got %d eigenvalues instead of 3' % len(evals))

        # get random measurement errors
        measerr = parameter.get('measerr', 0)

        # todo: normalize evals to 1?

        R = Anisotropy.createDiagonalTensor(*evals)

        #todo: also implement 1D measurement

        data = RockPyData(column_names=['d', 'i', 'x', 'y', 'z'])

        for mdir in mdirs:
            # M = R * H
            errs = [measerr * random() * 2 - measerr for i in (1, 2, 3)]
            measurement = np.dot(R, DIL2XYZ((mdir[0], mdir[1], 1))) + errs
            data = data.append_rows(np.hstack([np.array(mdir), measurement]))

        data.define_alias('variable', ('d', 'i'))

        mdata = {'data': data}

        return cls(sample_obj,
                   'anisotropy',
                   mfile=None,
                   mdata=mdata,
                   machine='simulation',
                   color=color,
                   **parameter)

Exemple #7

0

Afficher le fichier

Fichier : test_rockPyData.py Projet : yinyongqi/RockPy

class TestRockPyData(TestCase):
    def setUp(self):
        # run before each test
        self.testdata = ((1, 2, 3, 4),
                         (1, 6, 7, 8),
                         (1, 2, 11, 12),
                         (1, 6, 55, 66))

        self.col_names = ('F', 'Mx', 'My', 'Mz')
        self.row_names = ('1.Zeile', '2.Zeile_A', '3.Zeile', '4.Zeile_A')
        self.units = ('T', 'mT', 'fT', 'pT')

        self.RPD = RockPyData(column_names=self.col_names, row_names=self.row_names, units=self.units,
                              data=self.testdata)

    def test_column_names(self):
        self.assertEqual(self.RPD.column_names, list(self.col_names))

    def test_column_count(self):
        self.assertEqual(self.RPD.column_count, len(self.col_names))

    def test__find_duplicate_variable_rows(self):
        # self.assertTrue((self.RPD._find_duplicate_variables()[0] == np.array([0, 1, 2])).all())
        self.assertEqual(self.RPD._find_duplicate_variable_rows(), [(0, 1, 2, 3)])

        # redefine variabe alias to the first two columns
        self.RPD.define_alias('variable', ('F', 'Mx'))
        self.assertEqual(self.RPD._find_duplicate_variable_rows(), [(0, 2), (1, 3)])

    def test_rename_column(self):
        self.RPD.rename_column('Mx', 'M_x')
        self.assertEqual(self.RPD.column_names, ['F', 'M_x', 'My', 'Mz'])

    def test_append_rows(self):
        d1 = [[5, 6, 7, 8], [9, 10, 11, 12]]
        self.RPD = self.RPD.append_rows(d1, ('5.Zeile', '6.Zeile'))
        self.assertTrue(np.array_equal(self.RPD.v[-2:, :], np.array(d1)))
        d2 = [5, 6, 7, 8]
        self.RPD = self.RPD.append_rows(d2, '5.Zeile')
        self.assertTrue(np.array_equal(self.RPD.v[-1, :], np.array(d2)))
        # lets try with other RockPyData object
        rpd = copy.deepcopy(self.RPD)
        rpd.rename_column('Mx', 'M_x')
        self.RPD = self.RPD.append_rows(rpd)
        # TODO: add assert
        #print self.RPD

    def test_delete_rows(self):
        self.RPD = self.RPD.delete_rows((0, 2))
        self.assertTrue(np.array_equal(self.RPD.v, np.array(self.testdata)[(1, 3), :]))

    def test_eliminate_duplicate_variable_rows(self):
        # check for one variable column
        self.RPD = self.RPD.eliminate_duplicate_variable_rows()
        self.assertTrue(np.array_equal(self.RPD.v, np.array([]).reshape(0, 4)))

    def test_eliminate_duplicate_variable_rows2(self):
        # check for two variable columns
        self.RPD.define_alias('variable', ('F', 'Mx'))
        rpd = self.RPD.eliminate_duplicate_variable_rows(substfunc='mean')
        self.assertTrue(np.array_equal(rpd.v, np.array([[1., 2., 7., 8.], [1., 6., 31., 37.]])))
        self.assertTrue(np.array_equal(rpd.e, np.array([[0., 0., 4., 4.], [0., 0., 24., 29.]])))
        rpd = self.RPD.eliminate_duplicate_variable_rows(substfunc='last')
        self.assertTrue(np.array_equal(rpd.v, np.array([[1., 2., 11., 12.], [1., 6., 55., 66.]])))


    def test_mean(self):
        self.RPD = self.RPD.mean()
        self.assertTrue(np.array_equal(self.RPD.v, np.array([[1., 4., 19., 22.5]])))
        np.testing.assert_allclose(self.RPD.e, np.array([[0., 2., 20.976, 25.273]]), atol=0.01)

    def test_max(self):
        self.RPD = self.RPD.max()
        self.assertTrue(np.array_equal(self.RPD.v, np.array([[1., 6., 55., 66.]])))

    def test_filter_row_names(self):
        self.assertEqual(self.RPD.filter_row_names(('1.Zeile', '3.Zeile')).row_names, ['1.Zeile', '3.Zeile'])

    def test_filter_match_row_names(self):
        # get all rows ending with '_A'
        self.assertEqual(self.RPD.filter_match_row_names('.*_A').row_names, ['2.Zeile_A', '4.Zeile_A'])

    def test_append_columns(self):
        cb = self.RPD.column_count
        d = (8, 7, 6, 5)
        self.RPD = self.RPD.append_columns('neue Spalte', d)
        self.assertEqual(cb + 1, self.RPD.column_count)
        self.assertTrue(np.array_equal(self.RPD['neue Spalte'].v, np.array(d)))

    def test_sort(self):
        self.assertTrue(np.array_equal(self.RPD.sort('Mx')['Mx'].v, np.array((2, 2, 6, 6))))

    #def test_interpolate(self):
    #    self.RPD.define_alias('variable', 'My')
    #    iv = (1, 11, 33, 55, 100)
    #    self.assertTrue(np.array_equal((self.RPD.interpolate(iv))['My'].v, np.array(iv)))
    #    self.assertTrue(np.array_equal((self.RPD.interpolate(iv))['Mx'].v[1:-1], np.array([2., 4., 6.])))


    def test_magnitude(self):
        self.RPD.define_alias('m', ('Mx', 'My', 'Mz'))
        self.RPD = self.RPD.append_columns('mag', self.RPD.magnitude('m'))
        np.testing.assert_allclose(self.RPD['mag'].v, np.array([5.38516481, 12.20655562, 16.40121947, 86.12200648]), atol=1e-5)


    def test_column_names_to_indices(self):
        self.assertEqual( self.RPD.column_names_to_indices(('Mx', 'Mz')), [1,3])

    def test_interation(self):
        # TODO: add proper assertion
        for l in self.RPD:
            #print l
            pass

    def test_add_errors(self):
        d = RockPyData(column_names=['A', 'B'])
        #d['A'].v = 1  # Attribute Error NoneType has no attribute, maybe initialize to np.nan?
        #d['B'] = 2
        #d['A'].e = 4
        #d['B'].e = 5
        d = d.append_rows([1, 2])
        #print d
        d.e = [[4, 5]]

        self.assertEqual(5., d['B'].e)

    def test_data_assignment(self):
        print self.RPD
        # set only values
        self.RPD['Mx'] = [1.1, 1.2, 1.3, 1.4]
        print self.RPD
        # set values and errors
        self.RPD['Mx'] = [[[1.1, 0.11]], [[1.2, 0.12]], [[1.3, 0.13]], [[1.4, 0.14]]]
        print self.RPD

Exemple #8

0

Afficher le fichier

Fichier : samplegroup.py Projet : RiccardaNaeve/RockPy

class SampleGroup(object):
    """
    Container for Samples, has special calculation methods
    """
    log = logging.getLogger(__name__)

    count = 0

    def __init__(self, name=None, sample_list=None, sample_file=None, **options):
        SampleGroup.count += 1

        SampleGroup.log.info('CRATING new << samplegroup >>')

        # ## initialize
        if name is None:
            name = 'SampleGroup %04i' % (self.count)

        self.name = name
        self.samples = {}
        self.results = None

        self.color = None

        if sample_file:
            self.import_multiple_samples(sample_file, **options)

        self._info_dict = self.__create_info_dict()

        if sample_list:
            self.add_samples(sample_list)

    def __getstate__(self):
        '''
        returned dict will be pickled
        :return:
        '''
        state = {k: v for k, v in self.__dict__.iteritems() if k in
                 (
                     'name',
                     'samples',
                     'results'
                 )
                 }

        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        # self.recalc_info_dict()

    def __repr__(self):
        # return super(SampleGroup, self).__repr__()
        return "<RockPy.SampleGroup - << %s - %i samples >> >" % (self.name, len(self.sample_names))

    def __getitem__(self, item):
        if item in self.sdict:
            return self.samples[item]
        try:
            return self.sample_list[item]
        except KeyError:
            raise KeyError('SampleGroup has no Sample << %s >>' % item)

    def import_multiple_samples(self, sample_file, length_unit='mm', mass_unit='mg', **options):
        """
        imports a csv file with sample_names masses and dimensions and creates the sample_objects
        :param sample_file:
        :param length_unit:
        :param mass_unit:
        :return:
        """
        reader_object = csv.reader(open(sample_file), delimiter='\t')
        r_list = [i for i in reader_object if not '#' in i]
        header = r_list[0]
        d_dict = {i[0]: {header[j].lower(): float(i[j]) for j in range(1, len(i))} for i in r_list[1:]}
        for sample in d_dict:
            mass = d_dict[sample].get('mass', None)
            height = d_dict[sample].get('height', None)
            diameter = d_dict[sample].get('diameter', None)
            S = Sample(sample, mass=mass, height=height, diameter=diameter, mass_unit=mass_unit,
                       length_unit=length_unit)
            self.samples.update({sample: S})

    def pop_sample(self, sample_name):
        """
        remove samples from sample_group will take str(sample_name), list(sample_name)
        """
        if not isinstance(sample_name, list):
            sample_name = [sample_name]
        for sample in sample_name:
            if sample in self.samples:
                self.samples.pop(sample)
        return self

    # ### DATA properties
    @property
    def sample_list(self):
        return self.slist

    @property
    def sample_names(self):
        return sorted(self.samples.keys())

    def add_samples(self, s_list):
        """
        Adds a sample to the sample dictionary and adds the sample_group to sample.sample_groups

        Parameters
        ----------
           s_list: single item or list
              single items get transformed to list

        Note
        ----
           Uses _item_to_list for list conversion
        """

        s_list = _to_list(s_list)
        self.samples.update(self._sdict_from_slist(s_list=s_list))
        self.log.info('ADDING sample(s) %s' % [s.name for s in s_list])
        for s in s_list:
            s.sgroups.append(self)
            self.add_s2_info_dict(s)

    def remove_samples(self, s_list):
        """
        Removes a sample from the sgroup.samples dictionary and removes the sgroup from sample.sgroups

        Parameters
        ----------
           s_list: single item or list
              single items get transformed to list

        Note
        ----
           Uses _item_to_list for list conversion
        """

        s_list = _to_list(s_list)
        for s in s_list:
            self.sdict[s].sgroups.remove(self)
            self.samples.pop(s)

    # ## components of container
    # lists
    @property
    def slist(self):
        out = [self.samples[i] for i in sorted(self.samples.keys())]
        return out

    @property
    def sdict(self):
        return {s.name: s for s in self.sample_list}

    @property
    def mtypes(self):
        out = [sample.mtypes for sample in self.sample_list]
        return self.__sort_list_set(out)

    @property
    def stypes(self):
        out = []
        for sample in self.sample_list:
            out.extend(sample.stypes)
        return self.__sort_list_set(out)

    @property
    def svals(self):
        out = []
        for sample in self.sample_list:
            out.extend(sample.svals)
        return self.__sort_list_set(out)

    # measurement: samples
    @property
    def mtype_sdict(self):
        out = {mtype: self.get_samples(mtypes=mtype) for mtype in self.mtypes}
        return out

    # mtype: stypes
    @property
    def mtype_stype_dict(self):
        """
        returns a list of tratment types within a certain measurement type
        """
        out = {}
        for mtype in self.mtypes:
            aux = []
            for s in self.get_samples(mtypes=mtype):
                for t in s.mtype_tdict[mtype]:
                    aux.extend([t.stype])
            out.update({mtype: self.__sort_list_set(aux)})
        return out

    # mtype: svals
    @property
    def mtype_svals_dict(self):
        """
        returns a list of tratment types within a certain measurement type
        """
        out = {}
        for mtype in self.mtypes:
            aux = []
            for s in self.get_samples(mtypes=mtype):
                for t in s.mtype_tdict[mtype]:
                    aux.extend([t.value])
            out.update({mtype: self.__sort_list_set(aux)})
        return out

    @property
    def stype_sval_dict(self):
        stype_sval_dict = {i: self._get_all_series_values(i) for i in self.stypes}
        return stype_sval_dict

    @property
    def mtype_dict(self):
        m_dict = {i: [m for s in self.sample_list for m in s.get_measurements(i)] for i in self.mtypes}
        return m_dict

    def _get_all_series_values(self, stype):
        return sorted(list(set([n.value for j in self.sample_list for i in j.measurements for n in i.series
                                if n.stype == stype])))

    @property
    def mtypes(self):
        """
        looks through all samples and returns measurement types
        """
        return sorted(list(set([i.mtype for j in self.sample_list for i in j.measurements])))

    @property
    def stypes(self):
        """
        looks through all samples and returns measurement types
        """
        return sorted(list(set([t for sample in self.sample_list for t in sample.stypes])))

    def stype_results(self, **parameter):
        if not self.results:
            self.results = self.calc_all(**parameter)
        stypes = [i for i in self.results.column_names if 'stype' in i]
        out = {i.split()[1]: {round(j, 2): None for j in self.results[i].v} for i in stypes}

        for stype in out:
            for sval in out[stype]:
                key = 'stype ' + stype
                idx = np.where(self.results[key].v == sval)[0]
                out[stype][sval] = self.results.filter_idx(idx)
        return out

    def _sdict_from_slist(self, s_list):
        """
        creates a dictionary with s.name:s for each sample in a list of samples

        Parameters
        ----------
           s_list: sample or list
        Returns
        -------
           dict
              dictionary with {sample.name : sample} for each sample in s_list

        Note
        ----
           uses _to_list for item -> list conversion
        """
        s_list = _to_list(s_list)

        out = {s.name: s for s in s_list}
        return out

    def calc_all(self, **parameter):
        for sample in self.sample_list:
            label = sample.name
            sample.calc_all(**parameter)
            results = sample.results
            if self.results is None:
                self.results = RockPyData(column_names=results.column_names,
                                          data=results.data, row_names=[label for i in results.data])
            else:
                rpdata = RockPyData(column_names=results.column_names,
                                    data=results.data, row_names=[label for i in results.data])
                self.results = self.results.append_rows(rpdata)
        return self.results

    def average_results(self, **parameter):
        """
        makes averages of all calculations for all samples in group. Only samples with same series are averaged

        prams: parameter are calculation parameters, has to be a dictionary
        """
        substfunc = parameter.pop('substfunc', 'mean')
        out = None
        stype_results = self.stype_results(**parameter)
        for stype in stype_results:
            for sval in sorted(stype_results[stype].keys()):
                aux = stype_results[stype][sval]
                aux.define_alias('variable', 'stype ' + stype)
                aux = condense(aux, substfunc=substfunc)
                if out == None:
                    out = {stype: aux}
                else:
                    out[stype] = out[stype].append_rows(aux)
        return out

    def __add__(self, other):
        self_copy = SampleGroup(sample_list=self.sample_list)
        self_copy.samples.update(other.samples)
        return self_copy

    def _mlist_to_tdict(self, mlist):
        """
        takes a list of measurements looks for common stypes
        """
        stypes = sorted(list(set([m.stypes for m in mlist])))
        return {stype: [m for m in mlist if stype in m.stypes] for stype in stypes}

    def get_measurements(self,
                         snames=None,
                         mtypes=None,
                         series=None,
                         stypes=None, svals=None, sval_range=None,
                         mean=False,
                         invert=False,
                         **options):
        """
        Wrapper, for finding measurements, calls get_samples first and sample.get_measurements
        """
        samples = self.get_samples(snames, mtypes, stypes, svals, sval_range)
        out = []
        for sample in samples:
            try:
                out.extend(sample.get_measurements(mtypes=mtypes,
                                                   series=series,
                                                   stypes=stypes, svals=svals, sval_range=sval_range,
                                                   mean=mean,
                                                   invert=invert,
                                                   ))
            except TypeError:
                pass
        return out

    def delete_measurements(self, sname=None, mtype=None, stype=None, sval=None, sval_range=None):
        """
        deletes measurements according to criteria
        """
        samples = self.get_samples(snames=sname, mtypes=mtype, stypes=stype, svals=sval,
                                   sval_range=sval_range)  # search for samples with measurement fitting criteria
        for sample in samples:
            sample.remove_measurements(mtypes=mtype, stypes=stype, svals=sval,
                                       sval_range=sval_range)  # individually delete measurements from samples

    def get_samples(self, snames=None, mtypes=None, stypes=None, svals=None, sval_range=None):
        """
        Primary search function for all parameters

        Parameters
        ----------
           snames: list, str
              list of names or a single name of the sample to be retrieved
        """
        if svals is None:
            t_value = np.nan
        else:
            t_value = svals

        out = []

        if snames:
            snames = _to_list(snames)
            for s in snames:
                try:
                    out.append(self.samples[s])
                except KeyError:
                    raise KeyError('RockPy.sample_group does not contain sample << %s >>' % s)
            if len(out) == 0:
                raise KeyError('RockPy.sample_group does not contain any samples')
                return

        else:
            out = self.sample_list

        if mtypes:
            mtypes = _to_list(mtypes)
            out = [s for s in out for mtype in mtypes if mtype in s.mtypes]

        if len(out) == 0:
            raise KeyError('RockPy.sample_group does not contain sample with mtypes: << %s >>' % mtypes)
            return

        if stypes:
            stypes = _to_list(stypes)
            out = [s for s in out for stype in stypes if stype in s.stypes]
            if len(out) == 0:
                raise KeyError('RockPy.sample_group does not contain sample with stypes: << %s >>' % stypes)
                return

        if svals:
            svals = _to_list(svals)
            out = [s for s in out for sval in svals for stype in stypes if sval in s.stype_sval_dict[stype]]
            if len(out) == 0:
                self.log.error(
                    'RockPy.sample_group does not contain sample with (stypes, svals) pair: << %s, %s >>' % (
                        str(stypes), str(t_value)))
                return []

        if sval_range:
            if not isinstance(sval_range, list):
                sval_range = [0, sval_range]
            else:
                if len(sval_range) == 1:
                    sval_range = [0] + sval_range

            out = [s for s in out for tv in s.stype_sval_dict[stype] for stype in stypes
                   if tv <= max(sval_range)
                   if tv >= min(sval_range)]

            if len(out) == 0:
                raise KeyError(
                    'RockPy.sample_group does not contain sample with (stypes, sval_range) pair: << %s, %.2f >>' % (
                        stypes, t_value))
                return

        if len(out) == 0:
            SampleGroup.log.error(
                'UNABLE to find sample with << %s, %s, %s, %.2f >>' % (snames, mtypes, stypes, t_value))

        return out

    def create_mean_sample(self,
                           reference=None,
                           ref_dtype='mag', vval=None,
                           norm_dtypes='all',
                           norm_method='max',
                           interpolate=True,
                           substfunc='mean',
                           ):
        """
        Creates a mean sample out of all samples

        :param reference:
        :param ref_dtype:
        :param dtye:
        :param vval:
        :param norm_method:
        :param interpolate:
        :param substfunc:
        :return:
        """

        # create new sample_obj
        mean_sample = Sample(name='mean ' + self.name)
        # get all measurements from all samples in sample group and add to mean sample
        mean_sample.measurements = [m for s in self.sample_list for m in s.measurements]
        mean_sample.populate_mdict()

        for mtype in sorted(mean_sample.mdict['mtype_stype_sval']):
            if not mtype in ['mass', 'diameter', 'height', 'volume', 'x_len', 'y_len', 'z_len']:
                for stype in sorted(mean_sample.mdict['mtype_stype_sval'][mtype]):
                    for sval in sorted(mean_sample.mdict['mtype_stype_sval'][mtype][stype]):
                        series = None #initialize

                        # normalize if needed
                        if reference or vval:
                            for i, m in enumerate(mean_sample.mdict['mtype_stype_sval'][mtype][stype][sval]):
                                m = m.normalize(
                                    reference=reference, ref_dtype=ref_dtype,
                                    norm_dtypes=norm_dtypes,
                                    vval=vval, norm_method=norm_method)
                            series = m.get_series(stypes=stype, svals=sval)[0]
                            # print m, m.series, stype, sval

                        # calculating the mean of all measurements
                        M = mean_sample.mean_measurement(mtype=mtype, stype=stype, sval=sval,
                                                         substfunc=substfunc,
                                                         interpolate=interpolate,
                                                         # reference=reference, ref_dtype=ref_dtype,
                                                         # norm_dtypes=norm_dtypes,
                                                         # vval=vval, norm_method=norm_method,
                                                         )
                        if series:
                            M.add_sval(series_obj=series)
                        # print M.th
                        if reference or vval:
                            M.is_normalized = True
                            M.norm = [reference, ref_dtype, vval, norm_method, np.nan]

                        mean_sample.mean_measurements.append(M)

        mean_sample.is_mean = True  # set is_mean flag after all measuerements are created
        return mean_sample

    def __get_variable_list(self, rpdata_list):
        out = []
        for rp in rpdata_list:
            out.extend(rp['variable'].v)
        return self.__sort_list_set(out)

    def __sort_list_set(self, values):
        """
        returns a sorted list of non duplicate values
        :param values:
        :return:
        """
        return sorted(list(set(values)))

    ''' INFODICT '''

    def __create_info_dict(self):
        """
        creates all info dictionaries

        Returns
        -------
           dict
              Dictionary with a permutation of sample ,type, stype and sval.
        """
        d = ['mtype', 'stype', 'sval']
        keys = ['_'.join(i) for n in range(5) for i in itertools.permutations(d, n) if not len(i) == 0]
        out = {i: {} for i in keys}
        return out

    # @profile()
    def add_s2_info_dict(self, s):
        """
        Adds a sample to the infodict.

        Parameters
        ----------
           s: RockPySample
              The sample that should be added to the dictionary
        """

        keys = self.info_dict.keys()  # all possible keys

        for key in keys:
            # split keys into levels
            split_keys = key.split('_')
            for i, level in enumerate(split_keys):
                # i == level number, n == maximal level
                # if i == n _> last level -> list instead of dict
                n = len(split_keys) - 1

                # level 0
                for e0 in s.info_dict[key]:
                    # if only 1 level
                    if i == n == 0:
                        # create key with empty list
                        self._info_dict[key].setdefault(e0, list())
                        # add sample if not already in list
                        if not s in self._info_dict[key][e0]:
                            self._info_dict[key][e0].append(s)
                        continue
                    else:
                        # if not last entry generate key: dict() pair
                        self._info_dict[key].setdefault(e0, dict())

                    # level 1
                    for e1 in s.info_dict[key][e0]:
                        if i == n == 1:
                            self._info_dict[key][e0].setdefault(e1, list())
                            if not s in self._info_dict[key][e0][e1]:
                                self._info_dict[key][e0][e1].append(s)
                            continue
                        elif i > 0:
                            self._info_dict[key][e0].setdefault(e1, dict())

                            # level 2
                            for e2 in s.info_dict[key][e0][e1]:
                                if i == n == 2:
                                    self._info_dict[key][e0][e1].setdefault(e2, list())
                                    if not s in self._info_dict[key][e0][e1][e2]:
                                        self._info_dict[key][e0][e1][e2].append(s)
                                    continue
                                elif i > 1:
                                    self._info_dict[key][e0][e1].setdefault(e2, dict())

    def recalc_info_dict(self):
        """
        Recalculates the info_dictionary with information of all samples and their corresponding measurements

        """
        self._info_dict = self.__create_info_dict()
        map(self.add_s2_info_dict, self.slist)

    @property
    def info_dict(self):
        """
        Property for easy access of info_dict. If '_info_dict' has not been created, it will create one.
        """
        if not hasattr(self, '_info_dict'):
            self._info_dict = self.__create_info_dict()
            self.recalc_info_dict()
        return self._info_dict

Exemple #9

0

Afficher le fichier

class SampleGroup(object):
    """
    Container for Samples, has special calculation methods
    """
    log = logging.getLogger(__name__)

    count = 0

    def __init__(self,
                 name=None,
                 sample_list=None,
                 sample_file=None,
                 **options):
        SampleGroup.count += 1

        SampleGroup.log.info('CRATING new << samplegroup >>')

        # ## initialize
        if name is None:
            name = 'SampleGroup %04i' % (self.count)

        self.name = name
        self.samples = {}
        self.results = None

        self.color = None

        if sample_file:
            self.import_multiple_samples(sample_file, **options)

        self._info_dict = self.__create_info_dict()

        if sample_list:
            self.add_samples(sample_list)

    def __getstate__(self):
        '''
        returned dict will be pickled
        :return:
        '''
        state = {
            k: v
            for k, v in self.__dict__.iteritems()
            if k in ('name', 'samples', 'results')
        }

        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        # self.recalc_info_dict()

    def __repr__(self):
        # return super(SampleGroup, self).__repr__()
        return "<RockPy.SampleGroup - << %s - %i samples >> >" % (
            self.name, len(self.sample_names))

    def __getitem__(self, item):
        if item in self.sdict:
            return self.samples[item]
        try:
            return self.sample_list[item]
        except KeyError:
            raise KeyError('SampleGroup has no Sample << %s >>' % item)

    def import_multiple_samples(self,
                                sample_file,
                                length_unit='mm',
                                mass_unit='mg',
                                **options):
        """
        imports a csv file with sample_names masses and dimensions and creates the sample_objects
        :param sample_file:
        :param length_unit:
        :param mass_unit:
        :return:
        """
        reader_object = csv.reader(open(sample_file), delimiter='\t')
        r_list = [i for i in reader_object if not '#' in i]
        header = r_list[0]
        d_dict = {
            i[0]: {header[j].lower(): float(i[j])
                   for j in range(1, len(i))}
            for i in r_list[1:]
        }
        for sample in d_dict:
            mass = d_dict[sample].get('mass', None)
            height = d_dict[sample].get('height', None)
            diameter = d_dict[sample].get('diameter', None)
            S = Sample(sample,
                       mass=mass,
                       height=height,
                       diameter=diameter,
                       mass_unit=mass_unit,
                       length_unit=length_unit)
            self.samples.update({sample: S})

    def pop_sample(self, sample_name):
        """
        remove samples from sample_group will take str(sample_name), list(sample_name)
        """
        if not isinstance(sample_name, list):
            sample_name = [sample_name]
        for sample in sample_name:
            if sample in self.samples:
                self.samples.pop(sample)
        return self

    # ### DATA properties
    @property
    def sample_list(self):
        return self.slist

    @property
    def sample_names(self):
        return sorted(self.samples.keys())

    def add_samples(self, s_list):
        """
        Adds a sample to the sample dictionary and adds the sample_group to sample.sample_groups

        Parameters
        ----------
           s_list: single item or list
              single items get transformed to list

        Note
        ----
           Uses _item_to_list for list conversion
        """

        s_list = _to_list(s_list)
        self.samples.update(self._sdict_from_slist(s_list=s_list))
        self.log.info('ADDING sample(s) %s' % [s.name for s in s_list])
        for s in s_list:
            s.sgroups.append(self)
            self.add_s2_info_dict(s)

    def remove_samples(self, s_list):
        """
        Removes a sample from the sgroup.samples dictionary and removes the sgroup from sample.sgroups

        Parameters
        ----------
           s_list: single item or list
              single items get transformed to list

        Note
        ----
           Uses _item_to_list for list conversion
        """

        s_list = _to_list(s_list)
        for s in s_list:
            self.sdict[s].sgroups.remove(self)
            self.samples.pop(s)

    # ## components of container
    # lists
    @property
    def slist(self):
        out = [self.samples[i] for i in sorted(self.samples.keys())]
        return out

    @property
    def sdict(self):
        return {s.name: s for s in self.sample_list}

    @property
    def mtypes(self):
        out = [sample.mtypes for sample in self.sample_list]
        return self.__sort_list_set(out)

    @property
    def stypes(self):
        out = []
        for sample in self.sample_list:
            out.extend(sample.stypes)
        return self.__sort_list_set(out)

    @property
    def svals(self):
        out = []
        for sample in self.sample_list:
            out.extend(sample.svals)
        return self.__sort_list_set(out)

    # measurement: samples
    @property
    def mtype_sdict(self):
        out = {mtype: self.get_samples(mtypes=mtype) for mtype in self.mtypes}
        return out

    # mtype: stypes
    @property
    def mtype_stype_dict(self):
        """
        returns a list of tratment types within a certain measurement type
        """
        out = {}
        for mtype in self.mtypes:
            aux = []
            for s in self.get_samples(mtypes=mtype):
                for t in s.mtype_tdict[mtype]:
                    aux.extend([t.stype])
            out.update({mtype: self.__sort_list_set(aux)})
        return out

    # mtype: svals
    @property
    def mtype_svals_dict(self):
        """
        returns a list of tratment types within a certain measurement type
        """
        out = {}
        for mtype in self.mtypes:
            aux = []
            for s in self.get_samples(mtypes=mtype):
                for t in s.mtype_tdict[mtype]:
                    aux.extend([t.value])
            out.update({mtype: self.__sort_list_set(aux)})
        return out

    @property
    def stype_sval_dict(self):
        stype_sval_dict = {
            i: self._get_all_series_values(i)
            for i in self.stypes
        }
        return stype_sval_dict

    @property
    def mtype_dict(self):
        m_dict = {
            i: [m for s in self.sample_list for m in s.get_measurements(i)]
            for i in self.mtypes
        }
        return m_dict

    def _get_all_series_values(self, stype):
        return sorted(
            list(
                set([
                    n.value for j in self.sample_list for i in j.measurements
                    for n in i.series if n.stype == stype
                ])))

    @property
    def mtypes(self):
        """
        looks through all samples and returns measurement types
        """
        return sorted(
            list(
                set([
                    i.mtype for j in self.sample_list for i in j.measurements
                ])))

    @property
    def stypes(self):
        """
        looks through all samples and returns measurement types
        """
        return sorted(
            list(set([t for sample in self.sample_list
                      for t in sample.stypes])))

    def stype_results(self, **parameter):
        if not self.results:
            self.results = self.calc_all(**parameter)
        stypes = [i for i in self.results.column_names if 'stype' in i]
        out = {
            i.split()[1]: {round(j, 2): None
                           for j in self.results[i].v}
            for i in stypes
        }

        for stype in out:
            for sval in out[stype]:
                key = 'stype ' + stype
                idx = np.where(self.results[key].v == sval)[0]
                out[stype][sval] = self.results.filter_idx(idx)
        return out

    def _sdict_from_slist(self, s_list):
        """
        creates a dictionary with s.name:s for each sample in a list of samples

        Parameters
        ----------
           s_list: sample or list
        Returns
        -------
           dict
              dictionary with {sample.name : sample} for each sample in s_list

        Note
        ----
           uses _to_list for item -> list conversion
        """
        s_list = _to_list(s_list)

        out = {s.name: s for s in s_list}
        return out

    def calc_all(self, **parameter):
        for sample in self.sample_list:
            label = sample.name
            sample.calc_all(**parameter)
            results = sample.results
            if self.results is None:
                self.results = RockPyData(
                    column_names=results.column_names,
                    data=results.data,
                    row_names=[label for i in results.data])
            else:
                rpdata = RockPyData(column_names=results.column_names,
                                    data=results.data,
                                    row_names=[label for i in results.data])
                self.results = self.results.append_rows(rpdata)
        return self.results

    def average_results(self, **parameter):
        """
        makes averages of all calculations for all samples in group. Only samples with same series are averaged

        prams: parameter are calculation parameters, has to be a dictionary
        """
        substfunc = parameter.pop('substfunc', 'mean')
        out = None
        stype_results = self.stype_results(**parameter)
        for stype in stype_results:
            for sval in sorted(stype_results[stype].keys()):
                aux = stype_results[stype][sval]
                aux.define_alias('variable', 'stype ' + stype)
                aux = condense(aux, substfunc=substfunc)
                if out == None:
                    out = {stype: aux}
                else:
                    out[stype] = out[stype].append_rows(aux)
        return out

    def __add__(self, other):
        self_copy = SampleGroup(sample_list=self.sample_list)
        self_copy.samples.update(other.samples)
        return self_copy

    def _mlist_to_tdict(self, mlist):
        """
        takes a list of measurements looks for common stypes
        """
        stypes = sorted(list(set([m.stypes for m in mlist])))
        return {
            stype: [m for m in mlist if stype in m.stypes]
            for stype in stypes
        }

    def get_measurements(self,
                         snames=None,
                         mtypes=None,
                         series=None,
                         stypes=None,
                         svals=None,
                         sval_range=None,
                         mean=False,
                         invert=False,
                         **options):
        """
        Wrapper, for finding measurements, calls get_samples first and sample.get_measurements
        """
        samples = self.get_samples(snames, mtypes, stypes, svals, sval_range)
        out = []
        for sample in samples:
            try:
                out.extend(
                    sample.get_measurements(
                        mtypes=mtypes,
                        series=series,
                        stypes=stypes,
                        svals=svals,
                        sval_range=sval_range,
                        mean=mean,
                        invert=invert,
                    ))
            except TypeError:
                pass
        return out

    def delete_measurements(self,
                            sname=None,
                            mtype=None,
                            stype=None,
                            sval=None,
                            sval_range=None):
        """
        deletes measurements according to criteria
        """
        samples = self.get_samples(
            snames=sname,
            mtypes=mtype,
            stypes=stype,
            svals=sval,
            sval_range=sval_range
        )  # search for samples with measurement fitting criteria
        for sample in samples:
            sample.remove_measurements(
                mtypes=mtype, stypes=stype, svals=sval, sval_range=sval_range
            )  # individually delete measurements from samples

    def get_samples(self,
                    snames=None,
                    mtypes=None,
                    stypes=None,
                    svals=None,
                    sval_range=None):
        """
        Primary search function for all parameters

        Parameters
        ----------
           snames: list, str
              list of names or a single name of the sample to be retrieved
        """
        if svals is None:
            t_value = np.nan
        else:
            t_value = svals

        out = []

        if snames:
            snames = _to_list(snames)
            for s in snames:
                try:
                    out.append(self.samples[s])
                except KeyError:
                    raise KeyError(
                        'RockPy.sample_group does not contain sample << %s >>'
                        % s)
            if len(out) == 0:
                raise KeyError(
                    'RockPy.sample_group does not contain any samples')
                return

        else:
            out = self.sample_list

        if mtypes:
            mtypes = _to_list(mtypes)
            out = [s for s in out for mtype in mtypes if mtype in s.mtypes]

        if len(out) == 0:
            raise KeyError(
                'RockPy.sample_group does not contain sample with mtypes: << %s >>'
                % mtypes)
            return

        if stypes:
            stypes = _to_list(stypes)
            out = [s for s in out for stype in stypes if stype in s.stypes]
            if len(out) == 0:
                raise KeyError(
                    'RockPy.sample_group does not contain sample with stypes: << %s >>'
                    % stypes)
                return

        if svals:
            svals = _to_list(svals)
            out = [
                s for s in out for sval in svals for stype in stypes
                if sval in s.stype_sval_dict[stype]
            ]
            if len(out) == 0:
                self.log.error(
                    'RockPy.sample_group does not contain sample with (stypes, svals) pair: << %s, %s >>'
                    % (str(stypes), str(t_value)))
                return []

        if sval_range:
            if not isinstance(sval_range, list):
                sval_range = [0, sval_range]
            else:
                if len(sval_range) == 1:
                    sval_range = [0] + sval_range

            out = [
                s for s in out for tv in s.stype_sval_dict[stype]
                for stype in stypes if tv <= max(sval_range)
                if tv >= min(sval_range)
            ]

            if len(out) == 0:
                raise KeyError(
                    'RockPy.sample_group does not contain sample with (stypes, sval_range) pair: << %s, %.2f >>'
                    % (stypes, t_value))
                return

        if len(out) == 0:
            SampleGroup.log.error(
                'UNABLE to find sample with << %s, %s, %s, %.2f >>' %
                (snames, mtypes, stypes, t_value))

        return out

    def create_mean_sample(
        self,
        reference=None,
        ref_dtype='mag',
        vval=None,
        norm_dtypes='all',
        norm_method='max',
        interpolate=True,
        substfunc='mean',
    ):
        """
        Creates a mean sample out of all samples

        :param reference:
        :param ref_dtype:
        :param dtye:
        :param vval:
        :param norm_method:
        :param interpolate:
        :param substfunc:
        :return:
        """

        # create new sample_obj
        mean_sample = Sample(name='mean ' + self.name)
        # get all measurements from all samples in sample group and add to mean sample
        mean_sample.measurements = [
            m for s in self.sample_list for m in s.measurements
        ]
        mean_sample.populate_mdict()

        for mtype in sorted(mean_sample.mdict['mtype_stype_sval']):
            if not mtype in [
                    'mass', 'diameter', 'height', 'volume', 'x_len', 'y_len',
                    'z_len'
            ]:
                for stype in sorted(
                        mean_sample.mdict['mtype_stype_sval'][mtype]):
                    for sval in sorted(mean_sample.mdict['mtype_stype_sval']
                                       [mtype][stype]):
                        series = None  #initialize

                        # normalize if needed
                        if reference or vval:
                            for i, m in enumerate(
                                    mean_sample.mdict['mtype_stype_sval']
                                [mtype][stype][sval]):
                                m = m.normalize(reference=reference,
                                                ref_dtype=ref_dtype,
                                                norm_dtypes=norm_dtypes,
                                                vval=vval,
                                                norm_method=norm_method)
                            series = m.get_series(stypes=stype, svals=sval)[0]
                            # print m, m.series, stype, sval

                        # calculating the mean of all measurements
                        M = mean_sample.mean_measurement(
                            mtype=mtype,
                            stype=stype,
                            sval=sval,
                            substfunc=substfunc,
                            interpolate=interpolate,
                            # reference=reference, ref_dtype=ref_dtype,
                            # norm_dtypes=norm_dtypes,
                            # vval=vval, norm_method=norm_method,
                        )
                        if series:
                            M.add_sval(series_obj=series)
                        # print M.th
                        if reference or vval:
                            M.is_normalized = True
                            M.norm = [
                                reference, ref_dtype, vval, norm_method, np.nan
                            ]

                        mean_sample.mean_measurements.append(M)

        mean_sample.is_mean = True  # set is_mean flag after all measuerements are created
        return mean_sample

    def __get_variable_list(self, rpdata_list):
        out = []
        for rp in rpdata_list:
            out.extend(rp['variable'].v)
        return self.__sort_list_set(out)

    def __sort_list_set(self, values):
        """
        returns a sorted list of non duplicate values
        :param values:
        :return:
        """
        return sorted(list(set(values)))

    ''' INFODICT '''

    def __create_info_dict(self):
        """
        creates all info dictionaries

        Returns
        -------
           dict
              Dictionary with a permutation of sample ,type, stype and sval.
        """
        d = ['mtype', 'stype', 'sval']
        keys = [
            '_'.join(i) for n in range(5)
            for i in itertools.permutations(d, n) if not len(i) == 0
        ]
        out = {i: {} for i in keys}
        return out

    # @profile()
    def add_s2_info_dict(self, s):
        """
        Adds a sample to the infodict.

        Parameters
        ----------
           s: RockPySample
              The sample that should be added to the dictionary
        """

        keys = self.info_dict.keys()  # all possible keys

        for key in keys:
            # split keys into levels
            split_keys = key.split('_')
            for i, level in enumerate(split_keys):
                # i == level number, n == maximal level
                # if i == n _> last level -> list instead of dict
                n = len(split_keys) - 1

                # level 0
                for e0 in s.info_dict[key]:
                    # if only 1 level
                    if i == n == 0:
                        # create key with empty list
                        self._info_dict[key].setdefault(e0, list())
                        # add sample if not already in list
                        if not s in self._info_dict[key][e0]:
                            self._info_dict[key][e0].append(s)
                        continue
                    else:
                        # if not last entry generate key: dict() pair
                        self._info_dict[key].setdefault(e0, dict())

                    # level 1
                    for e1 in s.info_dict[key][e0]:
                        if i == n == 1:
                            self._info_dict[key][e0].setdefault(e1, list())
                            if not s in self._info_dict[key][e0][e1]:
                                self._info_dict[key][e0][e1].append(s)
                            continue
                        elif i > 0:
                            self._info_dict[key][e0].setdefault(e1, dict())

                            # level 2
                            for e2 in s.info_dict[key][e0][e1]:
                                if i == n == 2:
                                    self._info_dict[key][e0][e1].setdefault(
                                        e2, list())
                                    if not s in self._info_dict[key][e0][e1][
                                            e2]:
                                        self._info_dict[key][e0][e1][
                                            e2].append(s)
                                    continue
                                elif i > 1:
                                    self._info_dict[key][e0][e1].setdefault(
                                        e2, dict())

    def recalc_info_dict(self):
        """
        Recalculates the info_dictionary with information of all samples and their corresponding measurements

        """
        self._info_dict = self.__create_info_dict()
        map(self.add_s2_info_dict, self.slist)

    @property
    def info_dict(self):
        """
        Property for easy access of info_dict. If '_info_dict' has not been created, it will create one.
        """
        if not hasattr(self, '_info_dict'):
            self._info_dict = self.__create_info_dict()
            self.recalc_info_dict()
        return self._info_dict