def burt_table(data, attributes):
    """ Construct a Burt table (all values cross-tabulation) from data for attributes.
    
    Return and ordered list of (attribute, value) pairs and a numpy.ndarray with the tabulations.
    
    :param data: Data table.
    :type data: :class:`Orange.data.Table`
    
    :param attributes: List of attributes (must be Discrete).
    :type attributes: list
    
    Example ::
    
        >>> data = Orange.data.Table("smokers_ct")
        >>> items, counts = burt_table(data, [data.domain["Staff group"], data.domain["Smoking category"]])
        
    """
    values = [(attr, value) for attr in attributes for value in attr.values]
    table = numpy.zeros((len(values), len(values)))
    counts = [len(attr.values) for attr in attributes]
    offsets = [sum(counts[:i]) for i in range(len(attributes))]
    for i in range(len(attributes)):
        for j in range(i + 1):
            attr1 = attributes[i]
            attr2 = attributes[j]

            cm = contingency.VarVar(attr1, attr2, data)
            cm = numpy.array([list(row) for row in cm])

            range1 = range(offsets[i], offsets[i] + counts[i])
            range2 = range(offsets[j], offsets[j] + counts[j])
            start1, end1 = offsets[i], offsets[i] + counts[i]
            start2, end2 = offsets[j], offsets[j] + counts[j]

            table[start1:end1, start2:end2] += cm
            if i != j:  #also fill the upper part
                table[start2:end2, start1:end1] += cm.T

    return values, table
    ##    [95,    58,    66,    84,    41],
    ##    [80,    73,    83,     4 ,   96],
    ##    [79,    93,    35,    73,    63]])
    ##
    ##    data = [[9, 11, 4],
    ##                [ 3,          5,          3],
    ##                [     11,          6,          3],
    ##                [24,         73,         48]]

    # Author punctuation (from 'Correspondence Analysis - Herve Abdi Lynne J. Williams')
    data = [[7836, 13112, 6026], [53655, 102383, 42413],
            [115615, 184541, 59226], [161926, 340479, 62754],
            [38177, 105101, 12670], [46371, 58367, 14299]]

    c = CA(
        data,
        ["Rousseau", "Chateaubriand", "Hugo", "Zola", "Proust", "Giraudoux"],
        ["period", "comma", "other"])
    c.plot_scree_diagram()
    c.plot_biplot()

    import Orange
    data = Orange.data.Table("../../doc/datasets/smokers_ct")
    staff = data.domain["Staff group"]
    smoking = data.domain["Smoking category"]
    cont = contingency.VarVar(staff, smoking, data)

    c = CA(cont, staff.values, smoking.values)
    c.plot_scree_diagram()
    c.plot_biplot()
Exemple #3
0
# Description: Demonstrates the use of correspondence analysis
# Category:    correspondence, projection
# Classes:     CA
# Uses:        bridges.tab

import Orange
import Orange.projection.correspondence as corr
import Orange.statistics.contingency as cont

bridges = Orange.data.Table("bridges")
cm = cont.VarVar("PURPOSE", "MATERIAL", bridges)
ca = corr.CA(cm)


def report(coors, labels):
    for coor, label in zip(coors, labels):
        print "  %-10s (%.3f, %.3f)" % (label + ":", coor[0, 0], coor[0, 1])


print "PURPOSE"
report(ca.column_factors(), bridges.domain["PURPOSE"].values)
print

print "MATERIAL"
report(ca.row_factors(), bridges.domain["PURPOSE"].values)
print