def burt_table(data, attributes): """ Construct a Burt table (all values cross-tabulation) from data for attributes. Return and ordered list of (attribute, value) pairs and a numpy.ndarray with the tabulations. :param data: Data table. :type data: :class:`Orange.data.Table` :param attributes: List of attributes (must be Discrete). :type attributes: list Example :: >>> data = Orange.data.Table("smokers_ct") >>> items, counts = burt_table(data, [data.domain["Staff group"], data.domain["Smoking category"]]) """ values = [(attr, value) for attr in attributes for value in attr.values] table = numpy.zeros((len(values), len(values))) counts = [len(attr.values) for attr in attributes] offsets = [sum(counts[:i]) for i in range(len(attributes))] for i in range(len(attributes)): for j in range(i + 1): attr1 = attributes[i] attr2 = attributes[j] cm = contingency.VarVar(attr1, attr2, data) cm = numpy.array([list(row) for row in cm]) range1 = range(offsets[i], offsets[i] + counts[i]) range2 = range(offsets[j], offsets[j] + counts[j]) start1, end1 = offsets[i], offsets[i] + counts[i] start2, end2 = offsets[j], offsets[j] + counts[j] table[start1:end1, start2:end2] += cm if i != j: #also fill the upper part table[start2:end2, start1:end1] += cm.T return values, table
## [95, 58, 66, 84, 41], ## [80, 73, 83, 4 , 96], ## [79, 93, 35, 73, 63]]) ## ## data = [[9, 11, 4], ## [ 3, 5, 3], ## [ 11, 6, 3], ## [24, 73, 48]] # Author punctuation (from 'Correspondence Analysis - Herve Abdi Lynne J. Williams') data = [[7836, 13112, 6026], [53655, 102383, 42413], [115615, 184541, 59226], [161926, 340479, 62754], [38177, 105101, 12670], [46371, 58367, 14299]] c = CA( data, ["Rousseau", "Chateaubriand", "Hugo", "Zola", "Proust", "Giraudoux"], ["period", "comma", "other"]) c.plot_scree_diagram() c.plot_biplot() import Orange data = Orange.data.Table("../../doc/datasets/smokers_ct") staff = data.domain["Staff group"] smoking = data.domain["Smoking category"] cont = contingency.VarVar(staff, smoking, data) c = CA(cont, staff.values, smoking.values) c.plot_scree_diagram() c.plot_biplot()
# Description: Demonstrates the use of correspondence analysis # Category: correspondence, projection # Classes: CA # Uses: bridges.tab import Orange import Orange.projection.correspondence as corr import Orange.statistics.contingency as cont bridges = Orange.data.Table("bridges") cm = cont.VarVar("PURPOSE", "MATERIAL", bridges) ca = corr.CA(cm) def report(coors, labels): for coor, label in zip(coors, labels): print " %-10s (%.3f, %.3f)" % (label + ":", coor[0, 0], coor[0, 1]) print "PURPOSE" report(ca.column_factors(), bridges.domain["PURPOSE"].values) print print "MATERIAL" report(ca.row_factors(), bridges.domain["PURPOSE"].values) print