Ejemplo n.º 1
0
def test_statistics_module():
    """Testing all public methods in scidblib.statistics."""
    print '*** testing scidblib.statistics...'
    data = [3, 3, 4, 8]

    a = statistics.pstdev(data)
    assert round(a, 10) == 2.0615528128
    print 'pstdev =', a

    a = statistics.pvariance(data)
    assert a == 4.25
    print 'pvariance =', a

    a = statistics.stdev(data)
    assert round(a, 10) == 2.3804761428
    print 'stdev =', a

    a = statistics.variance(data)
    assert round(a, 10) == 5.6666666667
    print 'variance =', a

    a = statistics.median(data)
    assert a == 3.5
    print 'median =', a

    a = statistics.median_low(data)
    assert a == 3
    print 'median_low =', a

    a = statistics.median_high(data)
    assert a == 4
    print 'median_high =', a

    a = statistics.median_grouped(data)
    assert a == 3.5
    print 'median_grouped =', a

    a = statistics.mean(data)
    assert a == 4.5
    print 'mean =', a

    a = statistics.mode(data)
    assert a == 3
    print 'mode =', a
    print
Ejemplo n.º 2
0
def test_statistics_module():
    """Testing all public methods in scidblib.statistics."""
    print '*** testing scidblib.statistics...'
    data = [3, 3, 4, 8]

    a = statistics.pstdev(data)
    assert round(a, 10) == 2.0615528128
    print 'pstdev =', a

    a = statistics.pvariance(data)
    assert a == 4.25
    print 'pvariance =', a

    a = statistics.stdev(data)
    assert round(a, 10) == 2.3804761428
    print 'stdev =', a

    a = statistics.variance(data)
    assert round(a, 10) == 5.6666666667
    print 'variance =', a

    a = statistics.median(data)
    assert a == 3.5
    print 'median =', a

    a = statistics.median_low(data)
    assert a == 3
    print 'median_low =', a

    a = statistics.median_high(data)
    assert a == 4
    print 'median_high =', a

    a = statistics.median_grouped(data)
    assert a == 3.5
    print 'median_grouped =', a

    a = statistics.mean(data)
    assert a == 4.5
    print 'mean =', a

    a = statistics.mode(data)
    assert a == 3
    print 'mode =', a
    print
Ejemplo n.º 3
0
def get_chunk_statistics(array_name, _arrays=[], _chdescs=[]):
    """Derive chunk statistics for the given array."""

    # Read and cache results of these list() queries.
    if not _arrays:
        _arrays = make_table('array', "list('arrays', true)")
    if not _chdescs:
        want = "arrid,attid,nelem,csize,usize,asize"
        _chdescs = make_table('chdesc',
                              "project(list('chunk descriptors'),%s)" % want)

    # Find the versioned array id (vaid) of the given array name.
    if '@' in array_name:
        aname, ver = array_name.split('@')
        ver = int(ver)
        pairs = [(ver, int(x.aid)) for x in _arrays if x.name == array_name]
    else:
        aname = array_name
        pairs = [(int(x.name[x.name.index('@') + 1:]), int(x.aid))
                 for x in _arrays if x.name.startswith("%s@" % array_name)]
    if not pairs:
        raise AppError("Array '{0}' not found".format(array_name))
    vaid = sorted(pairs)[-1][1]
    dbg("Versioned array id for", array_name, "is", vaid)

    # Use vaid to collect chunk data from the chunkdesc table.
    attr_table = make_table('attrs', "attributes(%s)" % aname)
    attr2name = dict([(int(x.No), x.name) for x in attr_table])
    attr2chunks = dict([(int(x.No), []) for x in attr_table])
    ebm = len(attr2name)
    attr2name[ebm] = "emptyBitmap"
    attr2chunks[ebm] = []
    nchunks = 0
    chunks_per_attr = Counter()
    for cdesc in _chdescs:
        if int(cdesc.arrid) == vaid:
            aid = int(cdesc.attid)
            nchunks += 1
            chunks_per_attr[aid] += 1
            # Appended list *MUST* be in same order as CHUNK_STAT_TUPLES.
            attr2chunks[aid].append(
                map(int, (cdesc.nelem, cdesc.csize, cdesc.usize, cdesc.asize)))

    # Paranoid check that all attributes have same number of chunks.
    expected_cpa = nchunks // len(attr2name)
    complaints = []
    for x in chunks_per_attr:
        if chunks_per_attr[x] != expected_cpa:
            complaints.append("Attribute {0} has unexpected chunk count {1},"
                              " should be {2}".format(x, chunks_per_attr[x],
                                                      expected_cpa))
    if complaints:
        raise AppError('\n'.join(complaints))

    # Build ChunkStats objects, one per attribute.
    all_chunk_stats = []
    for aid in attr2name:
        if _PPRINT_FRIENDLY:
            stats = [
                array_name, (attr2name[aid], aid),
                ('chunks_per_attr', chunks_per_attr[aid])
            ]
        else:
            stats = [array_name, attr2name[aid], aid, chunks_per_attr[aid]]
        for i, datum in enumerate(CHUNK_STAT_TUPLES):
            data = sorted([x[i] for x in attr2chunks[aid]])
            stat = Stat(StatLib.mean(data), StatLib.stdev(data), data[0],
                        StatLib.median(data), data[-1])
            if _PPRINT_FRIENDLY:
                stats.append((datum, stat))
            else:
                stats.append(stat)
        if _PPRINT_FRIENDLY:
            all_chunk_stats.append(stats)
        else:
            all_chunk_stats.append(ChunkStats(*stats))

    # Done!
    return all_chunk_stats
Ejemplo n.º 4
0
def safe_stdev(x):
    if not x:
        return "(no data)"
    if len(x) == 1:
        return "(one datum: {0})".format(x[0])
    return statistics.stdev(x)