Esempio n. 1
0
    def test_getZScores(self):
        """Data taken from C++ test"""
        values = [12,13,9,18,7,9,14,16,10,12,7,13,14,19,10,16,12,16,19,11]
        arr = numpy.array(values,dtype=numpy.float64)
        
        zscore = Stats.getZscore(arr)
        self.assertAlmostEqual(1.63977, zscore[4], places = 4)
        self.assertAlmostEqual(0.32235, zscore[6], places = 4)

        modZ = Stats.getModifiedZscore(arr)
        self.assertAlmostEqual(1.23658, modZ[4], places = 4)
        self.assertAlmostEqual(0.33725, modZ[6], places = 4)
Esempio n. 2
0
    def test_getZScores(self):
        """Data taken from C++ test"""
        values = [
            12, 13, 9, 18, 7, 9, 14, 16, 10, 12, 7, 13, 14, 19, 10, 16, 12, 16,
            19, 11
        ]
        arr = numpy.array(values, dtype=numpy.float64)

        zscore = Stats.getZscore(arr)
        self.assertAlmostEqual(1.63977, zscore[4], places=4)
        self.assertAlmostEqual(0.32235, zscore[6], places=4)

        modZ = Stats.getModifiedZscore(arr)
        self.assertAlmostEqual(1.23658, modZ[4], places=4)
        self.assertAlmostEqual(0.33725, modZ[6], places=4)
    def PyExec(self):
        in_ws = mtd[self.getPropertyValue('InputWorkspace')]
        out_ws_name = self.getPropertyValue('OutputWorkspace')

        out_ws = ms.CreateEmptyTableWorkspace(OutputWorkspace=out_ws_name)

        out_ws.addColumn('str', 'statistic')

        stats = {
            'standard_deviation': dict(),
            'maximum': dict(),
            'minimum': dict(),
            'mean': dict(),
            'median': dict(),
        }

        for name in in_ws.getColumnNames():
            try:
                col_stats = _stats_to_dict(Stats.getStatistics(np.array([float(v) for v in in_ws.column(name)])))
                for statname in stats:
                    stats[statname][name] = col_stats[statname]
                out_ws.addColumn('float', name)
            except ValueError:
                logger.notice('Column \'%s\' is not numerical, skipping' % name)

        for name, stat in iteritems(stats):
            stat1 = dict(stat)
            stat1['statistic'] = name
            out_ws.addRow(stat1)

        self.setProperty('OutputWorkspace', out_ws)
    def PyExec(self):
        in_ws = mtd[self.getPropertyValue('InputWorkspace')]
        out_ws_name = self.getPropertyValue('OutputWorkspace')

        out_ws = ms.CreateEmptyTableWorkspace(OutputWOrkspace=out_ws_name)

        out_ws.addColumn('str', 'statistic')

        stats = {
            'standard_deviation': dict(),
            'maximum': dict(),
            'minimum': dict(),
            'mean': dict(),
            'median': dict(),
        }

        for name in in_ws.getColumnNames():
            try:
                col_stats = _stats_to_dict(
                    Stats.getStatistics(
                        np.array([float(v) for v in in_ws.column(name)])))
                for statname in stats.keys():
                    stats[statname][name] = col_stats[statname]
                out_ws.addColumn('float', name)
            except ValueError:
                logger.notice('Column \'%s\' is not numerical, skipping' %
                              name)

        for name, stat in stats.items():
            stat1 = dict(stat)
            stat1['statistic'] = name
            out_ws.addRow(stat1)

        self.setProperty('OutputWorkspace', out_ws_name)
Esempio n. 5
0
    def test_getStatistics_with_floats(self):
        data = numpy.array([17.2,18.1,16.5,18.3,12.6])
        stats = Stats.getStatistics(data)
        self.assertAlmostEqual(16.54, stats.mean, places = 10)
        self.assertAlmostEqual(2.0733, stats.standard_deviation, places = 4)
        self.assertEquals(12.6, stats.minimum)
        self.assertEquals(18.3, stats.maximum)
        self.assertEquals(17.2, stats.median)

        data = numpy.sort(data)
        stats = Stats.getStatistics(data, sorted=True)
        self.assertAlmostEqual(16.54, stats.mean, places = 10)
        self.assertAlmostEqual(2.0733, stats.standard_deviation, places = 4)
        self.assertEquals(12.6, stats.minimum)
        self.assertEquals(18.3, stats.maximum)
        self.assertEquals(17.2, stats.median)
Esempio n. 6
0
    def test_getStatistics_with_floats(self):
        data = numpy.array([17.2, 18.1, 16.5, 18.3, 12.6])
        stats = Stats.getStatistics(data)
        self.assertAlmostEqual(16.54, stats.mean, places=10)
        self.assertAlmostEqual(2.0733, stats.standard_deviation, places=4)
        self.assertEquals(12.6, stats.minimum)
        self.assertEquals(18.3, stats.maximum)
        self.assertEquals(17.2, stats.median)

        data = numpy.sort(data)
        stats = Stats.getStatistics(data, sorted=True)
        self.assertAlmostEqual(16.54, stats.mean, places=10)
        self.assertAlmostEqual(2.0733, stats.standard_deviation, places=4)
        self.assertEquals(12.6, stats.minimum)
        self.assertEquals(18.3, stats.maximum)
        self.assertEquals(17.2, stats.median)
Esempio n. 7
0
    def test_getZScores(self):
        """Data taken from C++ test"""
        values = [12,13,9,18,7,9,14,16,10,12,7,13,14,19,10,16,12,16,19,11]
        arr = numpy.array(values,dtype=numpy.float64)

        zscore = Stats.getZscore(arr)
        self.assertAlmostEqual(1.63977, zscore[4], places = 4)
        self.assertAlmostEqual(0.32235, zscore[6], places = 4)

        modZ = Stats.getModifiedZscore(arr)
        self.assertAlmostEqual(1.23658, modZ[4], places = 4)
        self.assertAlmostEqual(0.33725, modZ[6], places = 4)

        # Test the sorted argument still works. Remove this when the function is removed
        # sorted=True only ever affected the order
        zscore = Stats.getZscore(arr, sorted=True)
        self.assertAlmostEqual(1.63977, zscore[4], places = 4)
        self.assertAlmostEqual(0.32235, zscore[6], places = 4)
Esempio n. 8
0
    def test_getZScores(self):
        """Data taken from C++ test"""
        values = [12,13,9,18,7,9,14,16,10,12,7,13,14,19,10,16,12,16,19,11]
        arr = numpy.array(values,dtype=numpy.float64)

        zscore = Stats.getZscore(arr)
        self.assertAlmostEqual(1.63977, zscore[4], places = 4)
        self.assertAlmostEqual(0.32235, zscore[6], places = 4)

        modZ = Stats.getModifiedZscore(arr)
        self.assertAlmostEqual(1.23658, modZ[4], places = 4)
        self.assertAlmostEqual(0.33725, modZ[6], places = 4)

        # Test the sorted argument still works. Remove this when the function is removed
        # sorted=True only ever affected the order
        zscore = Stats.getZscore(arr, sorted=True)
        self.assertAlmostEqual(1.63977, zscore[4], places = 4)
        self.assertAlmostEqual(0.32235, zscore[6], places = 4)
Esempio n. 9
0
    def test_getMoments(self):
        mean = 5.
        sigma = 4.
        deltaX = .2
        numX = 200
        # calculate to have same number of points left and right of function
        offsetX = mean - (.5 * deltaX * float(numX))
        # variance about origin
        expVar = mean * mean + sigma * sigma
        # skew about origin
        expSkew = mean * mean * mean + 3. * mean * sigma * sigma

        # x-values to try out
        indep = numpy.arange(numX, dtype=numpy.float64)
        indep = indep * deltaX + offsetX

        # y-values
        # test different type
        depend = numpy.arange(numX, dtype=numpy.int32)
        self.assertRaises(ValueError, Stats.getMomentsAboutOrigin, indep,
                          depend)

        # now correct y values
        weightedDiff = (indep - mean) / sigma
        depend = numpy.exp(-0.5 * weightedDiff *
                           weightedDiff) / sigma / math.sqrt(2. * math.pi)

        aboutOrigin = Stats.getMomentsAboutOrigin(indep, depend)
        self.assertTrue(isinstance(aboutOrigin, numpy.ndarray))
        self.assertEquals(4, aboutOrigin.shape[0])
        self.assertAlmostEqual(1., aboutOrigin[0], places=4)
        self.assertAlmostEqual(mean, aboutOrigin[1], places=4)
        self.assertTrue(math.fabs(expVar - aboutOrigin[2]) < 0.001 * expVar)
        self.assertTrue(math.fabs(expSkew - aboutOrigin[3]) < 0.001 * expSkew)

        aboutMean = Stats.getMomentsAboutMean(indep, depend)
        self.assertTrue(isinstance(aboutOrigin, numpy.ndarray))
        self.assertEquals(4, aboutOrigin.shape[0])
        self.assertAlmostEqual(1., aboutMean[0], places=4)
        self.assertAlmostEqual(0., aboutMean[1], places=4)
        self.assertTrue(
            math.fabs(sigma * sigma - aboutMean[2]) < 0.001 * expVar)
        self.assertTrue(math.fabs(0. - aboutMean[3]) < 0.0001 * expSkew)
Esempio n. 10
0
    def test_getMoments(self):
        mean = 5.
        sigma = 4.
        deltaX = .2
        numX = 200
        # calculate to have same number of points left and right of function
        offsetX = mean - (.5 * deltaX * float(numX))
        # variance about origin
        expVar = mean*mean+sigma*sigma;
        # skew about origin
        expSkew = mean*mean*mean+3.*mean*sigma*sigma;

        # x-values to try out
        indep = numpy.arange(numX, dtype=numpy.float64)
        indep = indep*deltaX + offsetX


        # y-values
        # test different type
        depend = numpy.arange(numX, dtype=numpy.int32)
        self.assertRaises(ValueError, Stats.getMomentsAboutOrigin, indep, depend)

        # now correct y values
        weightedDiff = (indep-mean)/sigma
        depend = numpy.exp(-0.5*weightedDiff*weightedDiff)/sigma/math.sqrt(2.*math.pi)

        aboutOrigin = Stats.getMomentsAboutOrigin(indep, depend)
        self.assertTrue(isinstance(aboutOrigin, numpy.ndarray))
        self.assertEquals(4, aboutOrigin.shape[0])
        self.assertAlmostEqual(1., aboutOrigin[0], places=4)
        self.assertAlmostEqual(mean, aboutOrigin[1], places=4)
        self.assertTrue(math.fabs(expVar - aboutOrigin[2]) < 0.001*expVar)
        self.assertTrue(math.fabs(expSkew - aboutOrigin[3]) < 0.001*expSkew)

        aboutMean = Stats.getMomentsAboutMean(indep, depend)
        self.assertTrue(isinstance(aboutOrigin, numpy.ndarray))
        self.assertEquals(4, aboutOrigin.shape[0])
        self.assertAlmostEqual(1., aboutMean[0], places=4)
        self.assertAlmostEqual(0., aboutMean[1], places=4)
        self.assertTrue(math.fabs(sigma*sigma - aboutMean[2]) < 0.001*expVar)
        self.assertTrue(math.fabs(0. - aboutMean[3]) < 0.0001*expSkew)
    def PyExec(self):
        in_ws = mtd[self.getPropertyValue('InputWorkspace')]
        indices_list = self.getPropertyValue('ColumnIndices')
        out_ws_name = self.getPropertyValue('OutputWorkspace')
        column_names = in_ws.getColumnNames()

        # If column indices are not provided, then default to _ALL_ columns
        if len(indices_list) > 0:
            indices_list = [int(x) for x in indices_list.split(',')]
        else:
            indices_list = range(len(column_names))

        out_ws = ms.CreateEmptyTableWorkspace(OutputWorkspace=out_ws_name)

        out_ws.addColumn('str', 'Statistic')

        stats = collections.OrderedDict([
            ('StandardDev', collections.OrderedDict()),
            ('Minimum', collections.OrderedDict()),
            ('Median', collections.OrderedDict()),
            ('Maximum', collections.OrderedDict()),
            ('Mean', collections.OrderedDict()),
        ])

        for index in indices_list:
            column_name = column_names[index]
            try:
                column_data = np.array([float(v) for v in in_ws.column(index)])
                col_stats = _stats_to_dict(Stats.getStatistics(column_data))
                for stat_name in stats:
                    stats[stat_name][column_name] = col_stats[stat_name]
                out_ws.addColumn('float', column_name)
            except RuntimeError:
                logger.notice('Column \'%s\' is not numerical, skipping' %
                              column_name)
            except:
                logger.notice('Column \'%s\' is not numerical, skipping' %
                              column_name)

        for index, stat_name in iteritems(stats):
            stat = collections.OrderedDict(stat_name)
            stat['Statistic'] = index
            out_ws.addRow(stat)

        self.setProperty('OutputWorkspace', out_ws)
    def PyExec(self):
        in_ws = mtd[self.getPropertyValue('InputWorkspace')]
        indices_list = self.getPropertyValue('ColumnIndices')
        out_ws_name = self.getPropertyValue('OutputWorkspace')
        column_names = in_ws.getColumnNames()

        # If column indices are not provided, then default to _ALL_ columns
        if len(indices_list) > 0:
            indices_list = [int(x) for x in indices_list.split(',')]
        else:
            indices_list = range(len(column_names))

        out_ws = ms.CreateEmptyTableWorkspace(OutputWorkspace=out_ws_name)

        out_ws.addColumn('str', 'Statistic')

        stats = collections.OrderedDict([
            ('StandardDev', collections.OrderedDict()),
            ('Minimum', collections.OrderedDict()),
            ('Median', collections.OrderedDict()),
            ('Maximum', collections.OrderedDict()),
            ('Mean', collections.OrderedDict()),
        ])

        for index in indices_list:
            column_name = column_names[index]
            try:
                column_data = np.array([float(v) for v in in_ws.column(index)])
                col_stats = _stats_to_dict(Stats.getStatistics(column_data))
                for stat_name in stats:
                    stats[stat_name][column_name] = col_stats[stat_name]
                out_ws.addColumn('float', column_name)
            except RuntimeError:
                logger.notice('Column \'%s\' is not numerical, skipping' % column_name)
            except:
                logger.notice('Column \'%s\' is not numerical, skipping' % column_name)

        for index, stat_name in iteritems(stats):
            stat = collections.OrderedDict(stat_name)
            stat['Statistic'] = index
            out_ws.addRow(stat)

        self.setProperty('OutputWorkspace', out_ws)