Esempio n. 1
0
def smartReadMat(filename):
    """Read a matrix from a file, determining the file type automatically.

    This function returns a pair (matrix,fieldnames).  The filetype is
    determined automatically from the extension.  The following formats are
    currently supported:

    - '.amat' : PLearn ascii matrix format
    - '.pmat' : PLearn binary matrix format
    - '.csv'  : Text-file comma-separated values format
    """

    if filename.endswith(".amat"):
        arr, fieldnames = readAMat(filename)

    elif filename.endswith(".pmat"):
        pmat = PMat(filename)
        arr  = pmat.getRows(0, pmat.length)
        fieldnames = pmat.fieldnames
        pmat.close()

    elif filename.endswith(".csv"):
        # Use CSV sniffer to detect presence of header.
        sniffer = csv.Sniffer()
        f = open(filename)
        sample = f.read(1000)
        has_header = sniffer.has_header(sample)
        f.seek(0)

        # Load csv into array
        csv_reader = csv.reader(f)
        if has_header:
            fieldnames = csv_reader.next()
        arr = numpy.numarray.array([[float(value) for value in fields] for fields in csv_reader])
        if not has_header:
            # Generate fake fieldnames
            fieldnames = ['field%d' % (i + 1) for i in range(arr.shape[1])]
            
        f.close()

    else:
        raise ValueError, "Unrecognized file type for '%s'; valid extensions are: " \
                          "{'.amat', '.pmat', '.csv'}" % filename

    return arr, fieldnames
Esempio n. 2
0
        
        m = array([[stats[k][i] for i in range(self.width())] for k in sk])

        _printMatrix(m, sk, self.fieldnames, os, pretty)
        
        print "\nCovariance Matrix:"
        _printMatrix(stats["COV"], self.fieldnames, self.fieldnames, os, pretty)
        print "\nCorrelation Matrix:"
        _printMatrix(stats["CORR"], self.fieldnames, self.fieldnames, os, pretty)


if __name__ == "__main__":
    from plearn.vmat.readAMat import readAMat
    ut,fieldnames = readAMat(
            os.path.join(
                ppath.ppath('PLEARNDIR'),
                'examples', 'data', 'test_suite',
                'top_100_test.amat'
                )
            )
    sc = StatsCollector(fieldnames)
    sc.update(ut)
    sc.printStats(sys.stdout, False)
    print "\nAfter accumulating some more:"
    sc.update(ut)
    sc.printStats(sys.stdout, False)
    print "\nAfter forgetting:"
    sc.forget(fieldnames)
    sc.printStats(sys.stdout, False)
Esempio n. 3
0
            "N", "NMISSING", "NNONMISSING", "E", "V", "STDDEV", "STDERR",
            "SUM", "SUMSQ", "MIN", "ARGMIN", "MAX", "ARGMAX"
        ]

        m = array([[stats[k][i] for i in range(self.width())] for k in sk])

        _printMatrix(m, sk, self.fieldnames, os, pretty)

        print "\nCovariance Matrix:"
        _printMatrix(stats["COV"], self.fieldnames, self.fieldnames, os,
                     pretty)
        print "\nCorrelation Matrix:"
        _printMatrix(stats["CORR"], self.fieldnames, self.fieldnames, os,
                     pretty)


if __name__ == "__main__":
    from plearn.vmat.readAMat import readAMat
    ut, fieldnames = readAMat(
        os.path.join(ppath.ppath('PLEARNDIR'), 'examples', 'data',
                     'test_suite', 'top_100_test.amat'))
    sc = StatsCollector(fieldnames)
    sc.update(ut)
    sc.printStats(sys.stdout, False)
    print "\nAfter accumulating some more:"
    sc.update(ut)
    sc.printStats(sys.stdout, False)
    print "\nAfter forgetting:"
    sc.forget(fieldnames)
    sc.printStats(sys.stdout, False)