Ejemplo n.º 1
def printRecArray(recarr, precision=8):
    Print a record array in a mannerly fashion.
    @param recarr: The record array.
    @type recarr: recarray
    @keyword precision: The precision of the floats shown when printed
                        (default: 8)
    @type precision: int

    print mlab.rec2txt(recarr, precision=precision)
Ejemplo n.º 2
def printRecArray(recarr,precision=8):
    Print a record array in a mannerly fashion.
    @param recarr: The record array.
    @type recarr: recarray
    @keyword precision: The precision of the floats shown when printed
                        (default: 8)
    @type precision: int
    print mlab.rec2txt(recarr,precision=precision)
Ejemplo n.º 3
 def __str__(self):
     if self.names and self.size:
         table_ = rec2txt(self, padding=3, precision=4)
         #line = '.'*max(map(len, table_.partition('\n')))
         #return '\n'.join([line, table_, line])
         return table_
         return '[]'
Ejemplo n.º 4
 def printResult(self):
     string = "\n"
     string+= "MeanVariance Optimization Result\n"
     string+= "Optimization Mode  : %s\n"%self.optimizationMode
     weight_data = np.array(zip(self.nameList, self.optimizedWeights), dtype=[('Name', 'S20'), ('Weight', float)])
     string+= "\n"
     string+= "Optimization Result:\n"
     string+= rec2txt(weight_data)
     string+= "\n"
     string+= "Optimization Metric:\n"
     string+= "                 Mean:  %f\n"%self.optimizedMean
     string+= "      (optimized)Risk:  %f\n"%self.optimizedRisk
     string+= "       (targeted)Risk:  %f\n"%self.targetRisk
     string+= "               Sharpe:  %f\n"%(self.optimizedMean / self.optimizedRisk)
     string+= "\n"
     return string
Ejemplo n.º 5
 def __str__(self):
     str_ = list()
     if self.globalmeta:
         g = '\r\n'.join(
             ['globalmeta:'] +
             ['\t{0}:{1}'.format(k, v) for k, v in self.globalmeta.items()])
     if any([bool(fmeta) for fmeta in self.fieldmeta]):
         fieldmeta = zip(*(self.names, self.fieldmeta))
         f = '\r\n'.join(['fieldmeta:'] +
                         ['\t{0}:{1}'.format(n, f) for n, f in fieldmeta])
         d = rec2txt(self.view(recarray), padding=3, precision=4)
         d = super(LookUpTable, self).__str__()
     return '\r\n'.join(str_)
Ejemplo n.º 6
from __future__ import print_function
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.cbook as cbook

datafile = cbook.get_sample_data('aapl.csv', asfileobj=False)
print('loading', datafile)
r = mlab.csv2rec(datafile)

r1 = r[-10:]

# Create a new array
r2 = np.empty(12, dtype=[('date', '|O4'), ('high', np.float),
                         ('marker', np.float)])
r2 = r2.view(np.recarray)
r2.date = r.date[-17:-5]
r2.high = r.high[-17:-5]
r2.marker = np.arange(12)


defaults = {'marker': -1, 'close': np.NaN, 'low': -4444.}

for s in ('inner', 'outer', 'leftouter'):
    rec = mlab.rec_join(['date', 'high'], r1, r2,
                        jointype=s, defaults=defaults)
    print("\n%sjoin :\n%s" % (s, mlab.rec2txt(rec)))
Ejemplo n.º 7
rsum = mlab.rec_summarize(r, summaryfuncs)

# stats is a list of (dtype_name, function, output_dtype_name).
# rec_groupby will summarize the attribute identified by the
# dtype_name over the groups in the groupby list, and assign the
# result to the output_dtype_name
stats = (
    ('dreturn', len, 'rcnt'),
    ('dreturn', np.mean, 'rmean'),
    ('dreturn', np.median, 'rmedian'),
    ('dreturn', np.std, 'rsigma'),

# you can summarize over a single variable, like years or months
print 'summary by years'
ry = mlab.rec_groupby(rsum, ('years', ), stats)
print mlab.rec2txt(ry)

print 'summary by months'
rm = mlab.rec_groupby(rsum, ('months', ), stats)
print mlab.rec2txt(rm)

# or over multiple variables like years and months
print 'summary by year and month'
rym = mlab.rec_groupby(rsum, ('years', 'months'), stats)
print mlab.rec2txt(rym)

print 'summary by volume'
rv = mlab.rec_groupby(rsum, ('volcode', ), stats)
print mlab.rec2txt(rv)
Ejemplo n.º 8
rsum = mlab.rec_summarize(r, summaryfuncs)

# stats is a list of (dtype_name, function, output_dtype_name).
# rec_groupby will summarize the attribute identified by the
# dtype_name over the groups in the groupby list, and assign the
# result to the output_dtype_name
stats = (
    ('dreturn', len, 'rcnt'),
    ('dreturn', np.mean, 'rmean'),
    ('dreturn', np.median, 'rmedian'),
    ('dreturn', np.std, 'rsigma'),

# you can summarize over a single variable, like years or months
print('summary by years')
ry = mlab.rec_groupby(rsum, ('years', ), stats)

print('summary by months')
rm = mlab.rec_groupby(rsum, ('months', ), stats)

# or over multiple variables like years and months
print('summary by year and month')
rym = mlab.rec_groupby(rsum, ('years', 'months'), stats)

print('summary by volume')
rv = mlab.rec_groupby(rsum, ('volcode', ), stats)
Ejemplo n.º 9
    rall = np.load(os.path.join(outdir,

if 1:
    fig = plt.figure()
    plt.plot(rall.price, rall.dreturn, 'o')

    fig = plt.figure()
    plt.hist(rall.price, 20)

data = []
ranges = [(0, 1), (1, 2), (2, 5), (5, 10), (10, 20), (20, 100), (100, np.inf)]
for price_min, price_max in ranges:
    mask = (rall.price > price_min) & (rall.price <= price_max)
    rmask = rall[mask]
    prices = rmask.price
    dreturn = rmask.dreturn * 100
    median = np.median(dreturn)
    mean = np.mean(dreturn)
    count = len(dreturn)
    std = np.std(dreturn)
    rar = mean / std

    data.append((price_min, price_max, count, mean, median, std, rar))

rsummary = np.rec.fromrecords(
    data, names='price_min,price_max,count,mean,median,std,rar')

print mlab.rec2txt(rsummary)
Ejemplo n.º 10
Demonstrate how get_sample_data works with git revisions in the data.

    git clone [email protected]/matplotlib/sample_data.git

and edit testdata.csv to add a new row.  After committing the changes,
when you rerun this script you will get the updated data (and the new
git version will be cached in ~/.matplotlib/sample_data)

import matplotlib.mlab as mlab
import matplotlib.cbook as cbook

# get the file handle to the cached data and print the contents
datafile = 'testdir/subdir/testsub.csv'
fh = cbook.get_sample_data(datafile)
print fh.read()

# make sure we can read it using csv2rec
r = mlab.csv2rec(fh)

print mlab.rec2txt(r)


Ejemplo n.º 11
def analyze_yahoo_data(ticker, date1=datetime.date(2004, 1, 1), date2=datetime.date.today()):
    ticker, iso_week, data = load_yahoo_data(ticker, date1, date2)
    result_all = estimate_var(data)
    result_year = estimate_var(data, True)
    print rec2txt(result_all)
    print rec2txt(result_year)
Ejemplo n.º 12
def analyze_dzh_file(filename):
    ticker, iso_week, data = load_file(filename)
    result_all = estimate_var(data)
    result_year = estimate_var(data, True)
    print rec2txt(result_all)
    print rec2txt(result_year)
Ejemplo n.º 13
Demonstrate how get_sample_data works with git revisions in the data.

    git clone [email protected]/matplotlib/sample_data.git

and edit testdata.csv to add a new row.  After committing the changes,
when you rerun this script you will get the updated data (and the new
git version will be cached in ~/.matplotlib/sample_data)

from __future__ import print_function
import matplotlib.mlab as mlab
import matplotlib.cbook as cbook

# get the file handle to the cached data and print the contents
datafile = 'testdir/subdir/testsub.csv'
fh = cbook.get_sample_data(datafile)

# make sure we can read it using csv2rec
r = mlab.csv2rec(fh)



Ejemplo n.º 14
if 1:
    rall = np.load(os.path.join(outdir, 'stocks_combined.npy')).view(np.recarray)

if 1:
    fig = plt.figure()
    plt.plot(rall.price, rall.dreturn, 'o')

    fig = plt.figure()
    plt.hist(rall.price, 20)

data = []
ranges = [(0, 1), (1, 2), (2, 5), (5, 10), (10, 20), (20, 100), (100, np.inf)]
for price_min, price_max in ranges:
    mask = (rall.price > price_min) & (rall.price <= price_max)
    rmask = rall[mask]
    prices = rmask.price
    dreturn = rmask.dreturn * 100
    median = np.median(dreturn)
    mean = np.mean(dreturn)
    count =  len(dreturn)
    std = np.std(dreturn)
    rar = mean / std

    data.append((price_min, price_max, count, mean, median, std, rar))

rsummary = np.rec.fromrecords(data, names='price_min,price_max,count,mean,median,std,rar')

print mlab.rec2txt(rsummary)
Ejemplo n.º 15
rsum = mlab.rec_summarize(r, summaryfuncs)

# stats is a list of (dtype_name, function, output_dtype_name).
# rec_groupby will summarize the attribute identified by the
# dtype_name over the groups in the groupby list, and assign the
# result to the output_dtype_name
stats = (
    ('dreturn', len, 'rcnt'),
    ('dreturn', np.mean, 'rmean'),
    ('dreturn', np.median, 'rmedian'),
    ('dreturn', np.std, 'rsigma'),

# you can summarize over a single variable, like years or months
print('summary by years')
ry = mlab.rec_groupby(rsum, ('years',), stats)
print(mlab. rec2txt(ry))

print('summary by months')
rm = mlab.rec_groupby(rsum, ('months',), stats)

# or over multiple variables like years and months
print('summary by year and month')
rym = mlab.rec_groupby(rsum, ('years', 'months'), stats)

print('summary by volume')
rv = mlab.rec_groupby(rsum, ('volcode',), stats)
Ejemplo n.º 16
def volume_code(volume):
    'code the continuous volume data categorically'
    ind = np.searchsorted([1e5,1e6, 5e6,10e6, 1e7], volume)
    return ind
summaryfuncs = (
    ('date', lambda x: [thisdate.year for thisdate in x], 'years'),
    ('date', lambda x: [thisdate.month for thisdate in x], 'months'),
    ('date', lambda x: [thisdate.weekday() for thisdate in x], 'weekday'),
    ('adj_close', daily_return, 'dreturn'),
    ('volume', volume_code, 'volcode'),
rsum = mlab.rec_summarize(r, summaryfuncs)
stats = (
    ('dreturn', len, 'rcnt'),
    ('dreturn', np.mean, 'rmean'),
    ('dreturn', np.median, 'rmedian'),
    ('dreturn', np.std, 'rsigma'),
print 'summary by years'
ry = mlab.rec_groupby(rsum, ('years',), stats)
print mlab. rec2txt(ry)
print 'summary by months'
rm = mlab.rec_groupby(rsum, ('months',), stats)
print mlab.rec2txt(rm)
print 'summary by year and month'
rym = mlab.rec_groupby(rsum, ('years','months'), stats)
print mlab.rec2txt(rym)
print 'summary by volume'
rv = mlab.rec_groupby(rsum, ('volcode',), stats)
print mlab.rec2txt(rv)
Ejemplo n.º 17
import numpy as np
import matplotlib.mlab as mlab

r = mlab.csv2rec('../data/aapl.csv')
r1 = r[-10:]

# Create a new array
r2 = np.empty(12,
              dtype=[('date', '|O4'), ('high', np.float),
                     ('marker', np.float)])
r2 = r2.view(np.recarray)
r2.date = r.date[-17:-5]
r2.high = r.high[-17:-5]
r2.marker = np.arange(12)

print "r1:"
print mlab.rec2txt(r1)
print "r2:"
print mlab.rec2txt(r2)

defaults = {'marker': -1, 'close': np.NaN, 'low': -4444.}

for s in ('inner', 'outer', 'leftouter'):
    rec = mlab.rec_join(['date', 'high'],
    print "\n%sjoin :\n%s" % (s, mlab.rec2txt(rec))
Ejemplo n.º 18
import numpy as np
import matplotlib.mlab as mlab

r = mlab.csv2rec('../data/aapl.csv')
r1 = r[-10:]

# Create a new array
r2 = np.empty(12, dtype=[('date', '|O4'), ('high', np.float),
                            ('marker', np.float)])
r2 = r2.view(np.recarray)
r2.date = r.date[-17:-5]
r2.high = r.high[-17:-5]
r2.marker = np.arange(12)

print "r1:"
print mlab.rec2txt(r1)
print "r2:"
print mlab.rec2txt(r2)

defaults = {'marker':-1, 'close':np.NaN, 'low':-4444.}

for s in ('inner', 'outer', 'leftouter'):
    rec = mlab.rec_join(['date', 'high'], r1, r2,
            jointype=s, defaults=defaults)
    print "\n%sjoin :\n%s" % (s, mlab.rec2txt(rec))
Ejemplo n.º 19
def robustSVD(D,nMode=nMode,sigOut=sigOut,maxIt=maxIt,verbose=True):
    Robust SVD

    D = U S V.T

    This decomposition is computed using SVD.  SVD is sensitive to
    outliers, so we perform iterative sigma clipping in the `\chi^2`
    sense, but also in the distribution of best fit parameters.

    D_fit[i] = a_1 V_1 + a_2 V_2 + ... a_nMode V_nMode
    nMode is the (small) number of principle components we wish to fit
    our data with.

    If any of the a_j, or \chi^2_i = sum((D_fit[i] - D[i])**2)/D.size
    is an outlier, remove that row from D.

    D      : Data matrix.  1-D vectors stacked vertically (row-wise). May
             not contain nans, or masked values.
    nMode  : Number of modes
    sigOut : Clip outliers that are more than sigOut away from the
             median.  Defaults to the config value.
    maxIt  : Maximum number of iterations to perform before exiting.
             Defaults to the config value.

    Dnrow,Dncol = D.shape     
    D    = D.copy()
    gRow = np.ones(Dnrow,dtype=bool) # Good rows (not outliers)

    goodid  = np.arange(D.shape[0])

    # Iterate SVD fits.
    count = 0 
    finished = False
    while finished is False:
        if count == maxIt:

        D = D[gRow]
        Dnrow,Dncol = D.shape     

        U, s, V = np.linalg.svd(D,full_matrices=False)
        S = np.zeros(V.shape)
        S[:Dnrow,:Dnrow] = np.diag(s)
        A    = np.dot(U,S)                  # A is matrix of best fit coeff
        A    = A[:,:nMode]
        Dfit = np.dot(A,V[:nMode])  # Dfit is D represented by a
                                            # trucated series of modes
        # Evaluate Chi2
        X2 = np.sum( (Dfit - D)**2,axis=1) / Dncol

        rL = moments(A)

        if verbose:
            print("Moments of principle component weight")

        # Determine which rows of D are outliers
        dev  = (A - rL['med'])/rL['mad']

        # nMode x Dncol matrix of outlier coeffients
        Aout = abs(dev) > sigOut 

        # Dncol matrix of red-Chi2 outliers
        Xout = (X2 > 3)  | (X2 < 0.5) 
        Xout = Xout.reshape(Xout.size,1)
        # Dncol matrix with the number of coeff that failed.
        out    = np.hstack([Xout,Aout])

        # All coefficients must be inliers
        gRow   = out.astype(int).sum(axis=1) == 0 

        # If there are no outliers or we've reached the max number of
        # iterations return the input U,S,V,goodid,X2. If not, clip off
        # the outliers and repeat.
        if gRow.all() or (count == maxIt): 
            finished = True
            names = ['ID'] + ['Chi2'] + ['a%i' % (i+1) for i in range(nMode)]
            dtype = list(zip(names,[float]*len(names)))
            routData = np.hstack([np.vstack(goodid),np.vstack(X2),dev]) 
            routData = [tuple(r) for r in routData]
            rout = np.array(routData,dtype=dtype)

            if verbose:
                print("First 10 a/MAD(a)")
                print(("%i there are %i outliers " % (count,goodid[~gRow].size)))

            goodid = goodid[gRow]


    return U,S,V,goodid,X2