Ejemplos de DataMatrix en Python, ejemplos de pandas.DataMatrix en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: multi.py Proyecto: linuxster/statlib

    def __init__(self, models):
        self.models = models
        self.names = sorted(models.keys())

        mod = self.models.values()[0]
        self.pred_like = DataMatrix(dict(
            (k, v.pred_like) for k, v in models.iteritems()),
                                    index=mod.dates)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: pytables.py Proyecto: willgrass/pandas

def _read_matrix(group):
    index = _read_index(group, 'index')
    columns = _read_index(group, 'columns')
    values = group.values[:]
    objects = None

    if hasattr(group, 'obj_columns'):
        obj_columns = _read_index(group, 'columns')
        obj_values = group.obj_values[:]
        objects = DataMatrix(obj_values, index=index, columns=obj_columns)

    return DataMatrix(values, index=index, columns=columns, objects=objects)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: multi.py Proyecto: wesm/statlib

    def __init__(self, models):
        self.models = models
        self.names = sorted(models.keys())

        mod = self.models.values()[0]
        self.pred_like = DataMatrix(dict((k, v.pred_like)
                                         for k, v in models.iteritems()),
                                    index=mod.dates)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: finance.py Proyecto: willgrass/pandas

def getQuotes(symbol, start, end):
    quotes = fin.quotes_historical_yahoo(symbol, start, end)
    dates, open, close, high, low, volume = zip(*quotes)

    data = {
        'open': open,
        'close': close,
        'high': high,
        'low': low,
        'volume': volume
    }

    dates = Index([datetime.fromordinal(int(d)) for d in dates])
    return DataMatrix(data, index=dates)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: arma.py Proyecto: linuxster/statlib

    def __init__(self, phi):
        self.phi = phi
        self.p = len(phi)

        (self.modulus, self.frequency, self.wavelength,
         self.H) = self._compute_decomp()

        self.result = DataMatrix(
            {
                'wavelength': self.wavelength,
                'modulus': self.modulus,
                'frequency': self.frequency
            },
            columns=['modulus', 'wavelength', 'frequency'])

Ejemplo n.º 6

0

Mostrar archivo

Archivo: multi.py Proyecto: linuxster/statlib

class DLMMixture(object):
    """
    Mixture of DLMs (Class I type model)

    Parameters
    ----------
    models : dict

    Notes
    -----
    cf. W&H Section 12.2
    """
    def __init__(self, models):
        self.models = models
        self.names = sorted(models.keys())

        mod = self.models.values()[0]
        self.pred_like = DataMatrix(dict(
            (k, v.pred_like) for k, v in models.iteritems()),
                                    index=mod.dates)

    @property
    def post_model_prob(self):
        cumprod = self.pred_like.cumprod()
        return cumprod / cumprod.sum(1)

    def plot_post_prob(self):
        ratio = self.post_model_prob
        ratio.plot(subplots=True, sharey=True)
        ax = plt.gca()
        ax.set_ylim([0, 1])

    def get_weights(self, t):
        weights = self.post_model_prob
        return weights.xs(weights.index[t])

    def plot_mu_density(self, t, index=0, support_thresh=0.1):
        """
        Plot posterior densities for single model parameter over the set of
        mixture components

        Parameters
        ----------
        t : int
            time index, relative to response variable
        index : int
            parameter index to plot

        Notes
        -----
        cf. West & Harrison Figure 12.3. Automatically annotating individual
        component curves would probably be difficult.
        """
        ix = index
        dists = {}
        for name in self.names:
            model = self.models[name]
            df = model.df[t]
            mode = model.mu_mode[t + 1, ix]
            scale = np.sqrt(model.mu_scale[t + 1, ix, ix])
            dists[name] = stats.t(df, loc=mode, scale=scale)

        plot_mixture(dists, self.get_weights(t), support_thresh=support_thresh)

    def plot_forc_density(self, t, support_thresh=0.1):
        """
        Plot posterior densities for 1-step forecasts

        Parameters
        ----------
        t : int
            time index, relative to response variable

        Notes
        -----
        cf. West & Harrison Figure 12.4.
        """

        dists = {}
        for name in self.names:
            model = self.models[name]
            df = model.df[t]
            mode = model.forecast[t]
            scale = np.sqrt(model.forc_var[t])
            dists[name] = stats.t(df, loc=mode, scale=scale)

        plot_mixture(dists, self.get_weights(t), support_thresh=support_thresh)

Ejemplo n.º 7

0

Mostrar archivo

    # Set start and end dates
    t = map(int, sys.argv[1].split('-'))
    startday = dt.datetime(t[2], t[0], t[1])
    t = map(int, sys.argv[2].split('-'))
    endday = dt.datetime(t[2], t[0], t[1])

    # Get desired timestamps
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday, endday, timeofday)

    # Get the data from the data store
    dataobj = da.DataAccess('Norgate')
    historic = dataobj.get_data(timestamps, symbols, "close")

    # Setup the allocation table
    alloc_val = random.random()
    alloc = DataMatrix(index=[historic.index[0]],
                       data=[alloc_val],
                       columns=symbols)
    for date in range(1, len(historic.index)):
        alloc_val = 1  #random.random()
        alloc = alloc.append(
            DataMatrix(index=[historic.index[date]],
                       data=[alloc_val],
                       columns=[symbols[0]]))
    alloc['_CASH'] = 1 - alloc[symbols[0]]

    # Dump to pkl file
    output = open(sys.argv[3], "wb")
    cPickle.dump(alloc, output)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: finance.py Proyecto: choketsu/pandas

def getQuotes(symbol, start, end):
    quotes = fin.quotes_historical_yahoo(symbol, start, end)
    dates, open, close, high, low, volume = zip(*quotes)

    data = {"open": open, "close": close, "high": high, "low": low, "volume": volume}

    dates = Index([datetime.fromordinal(int(d)) for d in dates])
    return DataMatrix(data, index=dates)


msft = getQuotes("MSFT", startDate, endDate)
aapl = getQuotes("AAPL", startDate, endDate)
goog = getQuotes("GOOG", startDate, endDate)
ibm = getQuotes("IBM", startDate, endDate)

px = DataMatrix({"MSFT": msft["close"], "IBM": ibm["close"], "GOOG": goog["close"], "AAPL": aapl["close"]})
returns = px / px.shift(1) - 1

# Select dates

subIndex = ibm.index[(ibm["close"] > 95) & (ibm["close"] < 100)]
msftOnSameDates = msft.reindex(subIndex)

# Insert columns

msft["hi-lo spread"] = msft["high"] - msft["low"]
ibm["hi-lo spread"] = ibm["high"] - ibm["low"]

# Aggregate monthly

Ejemplo n.º 9

0

Mostrar archivo

Archivo: OneStock.py Proyecto: paulepps/Computational-Investing

    print "Running One Stock strategy from "+sys.argv[1] +" to "+sys.argv[2]

    # Use google symbol
    symbols = list(['SPY'])

    # Set start and end dates
    t = map(int,sys.argv[1].split('-'))
    startday = dt.datetime(t[2],t[0],t[1])
    t = map(int,sys.argv[2].split('-'))
    endday = dt.datetime(t[2],t[0],t[1])

    # Get desired timestamps
    timeofday=dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday,endday,timeofday)

    # Get the data from the data store
    dataobj = da.DataAccess('Norgate')
    historic = dataobj.get_data(timestamps, symbols, "close")

    # Setup the allocation table
    alloc_val= random.random()
    alloc=DataMatrix(index=[historic.index[0]], data=[alloc_val], columns=symbols)
    for date in range(1, len(historic.index)):
        alloc_val=1 #random.random()
        alloc=alloc.append(DataMatrix(index=[historic.index[date]], data=[alloc_val], columns=[symbols[0]]))
    alloc['_CASH']=1-alloc[symbols[0]]

    # Dump to pkl file
    output=open(sys.argv[3],"wb")
    cPickle.dump(alloc, output)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: multi.py Proyecto: wesm/statlib

class DLMMixture(object):
    """
    Mixture of DLMs (Class I type model)

    Parameters
    ----------
    models : dict

    Notes
    -----
    cf. W&H Section 12.2
    """
    def __init__(self, models):
        self.models = models
        self.names = sorted(models.keys())

        mod = self.models.values()[0]
        self.pred_like = DataMatrix(dict((k, v.pred_like)
                                         for k, v in models.iteritems()),
                                    index=mod.dates)

    @property
    def post_model_prob(self):
        cumprod = self.pred_like.cumprod()
        return cumprod / cumprod.sum(1)

    def plot_post_prob(self):
        ratio = self.post_model_prob
        ratio.plot(subplots=True, sharey=True)
        ax = plt.gca()
        ax.set_ylim([0, 1])

    def get_weights(self, t):
        weights = self.post_model_prob
        return weights.xs(weights.index[t])

    def plot_mu_density(self, t, index=0, support_thresh=0.1):
        """
        Plot posterior densities for single model parameter over the set of
        mixture components

        Parameters
        ----------
        t : int
            time index, relative to response variable
        index : int
            parameter index to plot

        Notes
        -----
        cf. West & Harrison Figure 12.3. Automatically annotating individual
        component curves would probably be difficult.
        """
        ix = index
        dists = {}
        for name in self.names:
            model = self.models[name]
            df = model.df[t]
            mode = model.mu_mode[t + 1, ix]
            scale = np.sqrt(model.mu_scale[t + 1, ix, ix])
            dists[name] = stats.t(df, loc=mode, scale=scale)

        plot_mixture(dists, self.get_weights(t),
                           support_thresh=support_thresh)

    def plot_forc_density(self, t, support_thresh=0.1):
        """
        Plot posterior densities for 1-step forecasts

        Parameters
        ----------
        t : int
            time index, relative to response variable

        Notes
        -----
        cf. West & Harrison Figure 12.4.
        """

        dists = {}
        for name in self.names:
            model = self.models[name]
            df = model.df[t]
            mode = model.forecast[t]
            scale = np.sqrt(model.forc_var[t])
            dists[name] = stats.t(df, loc=mode, scale=scale)

        plot_mixture(dists, self.get_weights(t),
                     support_thresh=support_thresh)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: finance.py Proyecto: willgrass/pandas

ibm['hi-lo spread'] = ibm['high'] - ibm['low']

# Aggregate monthly


def toMonthly(frame, how):
    offset = BMonthEnd()

    return frame.groupby(offset.rollforward).aggregate(how)


msftMonthly = toMonthly(msft, np.mean)
ibmMonthly = toMonthly(ibm, np.mean)

# Statistics

stdev = DataMatrix({'MSFT': msft.std(), 'IBM': ibm.std()})

# Arithmetic

ratios = ibm / msft

# Works with different indices

ratio = ibm / ibmMonthly
monthlyRatio = ratio.reindex(ibmMonthly.index)

# Ratio relative to past month average

filledRatio = ibm / ibmMonthly.reindex(ibm.index, fillMethod='pad')

Ejemplo n.º 12

0

Mostrar archivo

Archivo: finance.py Proyecto: jlsandell/pandas

        'close' : close,
        'high' : high,
        'low' : low,
        'volume' : volume
    }

    dates = Index([datetime.fromordinal(int(d)) for d in dates])
    return DataMatrix(data, index=dates)

msft = getQuotes('MSFT', startDate, endDate)
aapl = getQuotes('AAPL', startDate, endDate)
goog = getQuotes('GOOG', startDate, endDate)
ibm = getQuotes('IBM', startDate, endDate)

px = DataMatrix({'MSFT' : msft['close'],
                 'IBM' : ibm['close'],
                 'GOOG' : goog['close'],
                 'AAPL' : aapl['close']})
returns = px / px.shift(1) - 1

# Select dates

subIndex = ibm.index[(ibm['close'] > 95) & (ibm['close'] < 100)]
msftOnSameDates = msft.reindex(subIndex)

# Insert columns

msft['hi-lo spread'] = msft['high'] - msft['low']
ibm['hi-lo spread'] = ibm['high'] - ibm['low']

# Aggregate monthly