コード例 #1
0
ファイル: test_concat.py プロジェクト: RahulHP/pandas
    def test_panel_join_many(self):
        tm.K = 10
        panel = tm.makePanel()
        tm.K = 4

        panels = [panel.ix[:2], panel.ix[2:6], panel.ix[6:]]

        joined = panels[0].join(panels[1:])
        tm.assert_panel_equal(joined, panel)

        panels = [panel.ix[:2, :-5], panel.ix[2:6, 2:], panel.ix[6:, 5:-7]]

        data_dict = {}
        for p in panels:
            data_dict.update(p.iteritems())

        joined = panels[0].join(panels[1:], how='inner')
        expected = Panel.from_dict(data_dict, intersect=True)
        tm.assert_panel_equal(joined, expected)

        joined = panels[0].join(panels[1:], how='outer')
        expected = Panel.from_dict(data_dict, intersect=False)
        tm.assert_panel_equal(joined, expected)

        # edge cases
        self.assertRaises(ValueError, panels[0].join, panels[1:],
                          how='outer', lsuffix='foo', rsuffix='bar')
        self.assertRaises(ValueError, panels[0].join, panels[1:],
                          how='right')
コード例 #2
0
ファイル: databot.py プロジェクト: Mark1988huang/ppQuanTrade
 def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs):
     ''' Return a quote panel '''
     #TODO Replace adj_close with actual_close
     #TODO Add reindex methods, and start, end, delta parameters
     reverse = kwargs.get('reverse', False)
     verbose = kwargs.get('verbose', False)
     if self.connected['database']:
         symbols, markets = self.db.getTickersCodes(tickers)
     elif not symbols:
         self._logger.error('** No database neither informations provided')
         return None
     timestamps = du.getNYSEdays(index[0], index[-1], dt.timedelta(hours=16))
     csv = da.DataAccess('Yahoo')
     df = csv.get_data(timestamps, symbols.values(), fields, verbose=verbose)
     quotes_dict = dict()
     for ticker in tickers:
         j = 0
         quotes_dict[ticker] = dict()
         for field in fields:
             serie = df[j][symbols[ticker]].groupby(index.freq.rollforward).aggregate(np.mean)
             #TODO add a function parameter to decide what to do about it
             clean_serie = serie.fillna(method='pad')
             quotes_dict[ticker][field] = clean_serie
             j += 1
     if reverse:
         return Panel.from_dict(quotes_dict, intersect=True, orient='minor')
     return Panel.from_dict(quotes_dict, intersect=True)
コード例 #3
0
 def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs):
     ''' Return a quote panel '''
     #TODO Replace adj_close with actual_close
     #TODO Add reindex methods, and start, end, delta parameters
     reverse = kwargs.get('reverse', False)
     verbose = kwargs.get('verbose', False)
     if self.connected['database']:
         symbols, markets = self.db.getTickersCodes(tickers)
     elif not symbols:
         self._logger.error('** No database neither informations provided')
         return None
     timestamps = du.getNYSEdays(index[0], index[-1],
                                 dt.timedelta(hours=16))
     csv = da.DataAccess('Yahoo')
     df = csv.get_data(timestamps,
                       symbols.values(),
                       fields,
                       verbose=verbose)
     quotes_dict = dict()
     for ticker in tickers:
         j = 0
         quotes_dict[ticker] = dict()
         for field in fields:
             serie = df[j][symbols[ticker]].groupby(
                 index.freq.rollforward).aggregate(np.mean)
             #TODO add a function parameter to decide what to do about it
             clean_serie = serie.fillna(method='pad')
             quotes_dict[ticker][field] = clean_serie
             j += 1
     if reverse:
         return Panel.from_dict(quotes_dict, intersect=True, orient='minor')
     return Panel.from_dict(quotes_dict, intersect=True)
コード例 #4
0
ファイル: test_concat.py プロジェクト: SKNIRBHAY/TechWise-1
    def test_panel_join_many(self):
        tm.K = 10
        panel = tm.makePanel()
        tm.K = 4

        panels = [panel.ix[:2], panel.ix[2:6], panel.ix[6:]]

        joined = panels[0].join(panels[1:])
        tm.assert_panel_equal(joined, panel)

        panels = [panel.ix[:2, :-5], panel.ix[2:6, 2:], panel.ix[6:, 5:-7]]

        data_dict = {}
        for p in panels:
            data_dict.update(p.iteritems())

        joined = panels[0].join(panels[1:], how='inner')
        expected = Panel.from_dict(data_dict, intersect=True)
        tm.assert_panel_equal(joined, expected)

        joined = panels[0].join(panels[1:], how='outer')
        expected = Panel.from_dict(data_dict, intersect=False)
        tm.assert_panel_equal(joined, expected)

        # edge cases
        self.assertRaises(ValueError,
                          panels[0].join,
                          panels[1:],
                          how='outer',
                          lsuffix='foo',
                          rsuffix='bar')
        self.assertRaises(ValueError, panels[0].join, panels[1:], how='right')
コード例 #5
0
 def get_divergence_scores_panel(self, divergence=divergence_metrics.JensenShannonDivergence):
     exp_panel=Panel.from_dict(self.sampler_dic)
     exp_dic={}
     for exp_no in exp_panel.items:
         q_dic={}
         for query in exp_panel.major_axis:
             f_dic={}
             for feature in exp_panel.minor_axis:
                 K=exp_panel.ix[exp_no,query,feature]
                 H= self.pop_dic[feature]#orig_panel.ix[0,major,minor]
                 f_dic[feature]=divergence.compute(H, K)
             q_dic[query]=Series(f_dic)
         exp_dic[exp_no]=DataFrame(q_dic)
     return Panel.from_dict(exp_dic, orient='minor')
コード例 #6
0
ファイル: moments.py プロジェクト: PhE/pandas
def rolling_corr_pairwise(df, window, min_periods=None):
    """
    Computes pairwise rolling correlation matrices as Panel whose items are
    dates

    Parameters
    ----------
    df : DataFrame
    window : int
    min_periods : int, default None

    Returns
    -------
    correls : Panel
    """
    from pandas import Panel
    from collections import defaultdict

    all_results = defaultdict(dict)

    for i, k1 in enumerate(df.columns):
        for k2 in df.columns[i:]:
            corr = rolling_corr(df[k1], df[k2], window,
                                min_periods=min_periods)
            all_results[k1][k2] = corr
            all_results[k2][k1] = corr

    return Panel.from_dict(all_results).swapaxes('items', 'major')
コード例 #7
0
ファイル: moments.py プロジェクト: tlperkins/pandas
def rolling_corr_pairwise(df, window, min_periods=None):
    """
    Computes pairwise rolling correlation matrices as Panel whose items are
    dates

    Parameters
    ----------
    df : DataFrame
    window : int
    min_periods : int, default None

    Returns
    -------
    correls : Panel
    """
    from pandas import Panel
    from collections import defaultdict

    all_results = defaultdict(dict)

    for i, k1 in enumerate(df.columns):
        for k2 in df.columns[i:]:
            corr = rolling_corr(df[k1],
                                df[k2],
                                window,
                                min_periods=min_periods)
            all_results[k1][k2] = corr
            all_results[k2][k1] = corr

    return Panel.from_dict(all_results).swapaxes('items', 'major')
コード例 #8
0
ファイル: simparser.py プロジェクト: aitatanit/PAME
    def primary_panel(self, minor_axis=None, prefix=None):
        """ Returns primary as a Panel if possible, if fails, raises warning
        and returns as dict.
        """
        primary_of_df = {
        }  #Create a primary of dataframes, so has to convert all values to DF's
        ignoring = []  # If can't convert a value to df, let user know

        try:
            wavelengths = self.static[
                globalparms.spectralparameters]['lambdas']
        except Exception:
            logging.warning(
                'Could not find lambdas in self.static, primary panel will'
                ' not be indexed by wavelength...')
            wavelengths = None

        # Try to convert to dataframes.   If fails on one step, should fail on all the steps
        for step, data in self.primary.items():
            primary_of_df[step] = DataFrame(data, index=wavelengths)

        # Panel with as simulation variabless as major axis (ie A_avg, R_0)
        outpanel = Panel.from_dict(primary_of_df, orient='minor')

        # Sort Items alphabetically (R_avg, R_0, R_1, T_avg, ...)
        outpanel = outpanel.reindex_axis(sorted(
            outpanel.items))  #<-- Sorted items alphabetically

        # Sort Minor axis with integer suffix (step_0, step_1, step_2)
        # http://stackoverflow.com/questions/4287209/sort-list-of-strings-by-integer-suffix-in-python
        outpanel = outpanel.reindex_axis(
            putil.stepsort(outpanel.minor_axis),
            axis=2,  #items axis
            copy=False)  #Save memory

        # REORIENTATION OF MINOR AXIS LABELS
        if minor_axis:
            if isinstance(minor_axis, basestring):
                inputarray = self.inputs[
                    minor_axis]  # values like 50, 60, 70, so want prefix/?
                newaxis = dict(zip(outpanel.minor_axis, inputarray))
                # end of day, want basically {'step_1':'vfrac_0.5, 'step_2', 'vfrac_0.10' ...
                if prefix:
                    # No delimiter (ie %s_%s) because prefix can set that eg prefix = layerd_ or layerd=
                    newaxis = dict((k, '%s%.4f' % (prefix, v))
                                   for k, v in newaxis.items())
            elif isinstance(minor_axis, int):
                pass
            else:
                raise SimParserError(
                    'Can only map strings or integers to primary_panel, get type %s.'
                    ' These should correspond to the keys in %s' %
                    (type(minor_axis, self.inputs)))

            outpanel = outpanel.rename(minor_axis=newaxis)

        return outpanel
コード例 #9
0
 def apply(self, func, *args, **kwargs):
     result = {}
     for key, df in self.obj.iteritems():
         grp = DataFrameGroupBy(df, grouper=self.grouper)
         if not callable(func):
             f = getattr(grp, func)
             res = f(*args, **kwargs)
         result[key] = res
     return Panel.from_dict(result)
コード例 #10
0
ファイル: groupby.py プロジェクト: milktrader/trtools
 def apply(self, func, *args, **kwargs):
     result = {}
     for key, df in self.obj.iteritems():
         grp = DataFrameGroupBy(df, grouper=self.grouper)
         if not callable(func):
             f = getattr(grp, func)
             res = f(*args, **kwargs)
         result[key] = res
     return Panel.from_dict(result)
コード例 #11
0
ファイル: test_sparse.py プロジェクト: klausz/pandas
        def _check(frame):
            dense_frame = frame.to_dense()

            wp = Panel.from_dict({"foo": frame})
            from_dense_lp = wp.to_frame()

            from_sparse_lp = spf.stack_sparse_frame(frame)

            self.assert_(np.array_equal(from_dense_lp.values, from_sparse_lp.values))
コード例 #12
0
ファイル: test_sparse.py プロジェクト: theandygross/pandas
        def _check(frame):
            dense_frame = frame.to_dense()

            wp = Panel.from_dict({'foo': frame})
            from_dense_lp = wp.to_frame()

            from_sparse_lp = spf.stack_sparse_frame(frame)

            self.assert_(
                np.array_equal(from_dense_lp.values, from_sparse_lp.values))
コード例 #13
0
ファイル: test_fama_macbeth.py プロジェクト: EmlynC/pandas
    def testFamaMacBethRolling(self):
        # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y,
        #                               nw_lags_beta=2)

        df = DataFrame(np.random.randn(50, 10))
        x = dict((k, df) for k in 'abcdefg')
        x = Panel.from_dict(x)
        y = df + DataFrame(0.01 * np.random.randn(50, 10))
        self.checkFamaMacBethExtended('rolling', x, y, nw_lags_beta=2)
        self.checkFamaMacBethExtended('expanding', x, y, nw_lags_beta=2)
コード例 #14
0
ファイル: test_frame.py プロジェクト: aechase/pandas
        def _check(frame):
            dense_frame = frame.to_dense()  # noqa

            wp = Panel.from_dict({'foo': frame})
            from_dense_lp = wp.to_frame()

            from_sparse_lp = spf.stack_sparse_frame(frame)

            self.assert_numpy_array_equal(from_dense_lp.values,
                                          from_sparse_lp.values)
コード例 #15
0
    def testFamaMacBethRolling(self):
        # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y,
        #                               nw_lags_beta=2)

        df = DataFrame(np.random.randn(50, 10))
        x = dict((k, df) for k in 'abcdefg')
        x = Panel.from_dict(x)
        y = df + DataFrame(0.01 * np.random.randn(50, 10))
        self.checkFamaMacBethExtended('rolling', x, y, nw_lags_beta=2)
        self.checkFamaMacBethExtended('expanding', x, y, nw_lags_beta=2)
コード例 #16
0
            def _check(frame):
                dense_frame = frame.to_dense()  # noqa

                wp = Panel.from_dict({'foo': frame})
                from_dense_lp = wp.to_frame()

                from_sparse_lp = spf.stack_sparse_frame(frame)

                tm.assert_numpy_array_equal(from_dense_lp.values,
                                            from_sparse_lp.values)
コード例 #17
0
ファイル: test_fama_macbeth.py プロジェクト: hack-c/pandas
    def testFamaMacBethRolling(self):
        # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y,
        #                               nw_lags_beta=2)

        # df = DataFrame(np.random.randn(50, 10))
        x = dict((k, DataFrame(np.random.randn(50, 10))) for k in "abcdefg")
        x = Panel.from_dict(x)
        y = DataFrame(np.random.randn(50, 10)) + DataFrame(0.01 * np.random.randn(50, 10))
        self.checkFamaMacBethExtended("rolling", x, y, nw_lags_beta=2)
        self.checkFamaMacBethExtended("expanding", x, y, nw_lags_beta=2)
コード例 #18
0
 def analyze(self):
     panel=Panel.from_dict(self.sampler_dic)
     t_panel=panel.transpose(1,0,2)
     num_of_queries= len(t_panel.items)
     for i,query  in enumerate(t_panel.items):
         dic={}
         #for (feature,distribution) in t_panel[query].apply(dict_ops.avg_list_of_dictionaries,axis=0, reduce=False).iteritems():
         #    dic[feature]=distribution          
         for feature in t_panel[query].columns:
             dic[feature] = dict_ops.avg_list_of_dictionaries(t_panel[query][feature].tolist())
         descriptive_stats_plotter.save_pdf_cdf_plot_for_a_single_graph(dic, self.title+'\n('+query+')', statistics_included=True, file_name= self.file_name.rsplit('.',1)[0]+'_'+query+'.'+self.file_name.rsplit('.',1)[1], show=True if i==num_of_queries-1 else False)
         
コード例 #19
0
ファイル: simparser.py プロジェクト: hugadams/PAME
    def primary_panel(self, minor_axis=None, prefix=None):
        """ Returns primary as a Panel if possible, if fails, raises warning
        and returns as dict.
        """        
        primary_of_df = {} #Create a primary of dataframes, so has to convert all values to DF's
        ignoring = [] # If can't convert a value to df, let user know

        try:
            wavelengths = self.static[globalparms.spectralparameters]['lambdas']
        except Exception:
            logging.warning('Could not find lambdas in self.static, primary panel will'
                            ' not be indexed by wavelength...')
            wavelengths = None

        # Try to convert to dataframes.   If fails on one step, should fail on all the steps
        for step, data in self.primary.items():
            primary_of_df[step] = DataFrame(data, index=wavelengths)
        
        # Panel with as simulation variabless as major axis (ie A_avg, R_0)
        outpanel = Panel.from_dict(primary_of_df, orient='minor')
        
        # Sort Items alphabetically (R_avg, R_0, R_1, T_avg, ...)
        outpanel = outpanel.reindex_axis(sorted(outpanel.items))  #<-- Sorted items alphabetically
        
        # Sort Minor axis with integer suffix (step_0, step_1, step_2)
        # http://stackoverflow.com/questions/4287209/sort-list-of-strings-by-integer-suffix-in-python
        outpanel = outpanel.reindex_axis(putil.stepsort(outpanel.minor_axis),
                                        axis=2, #items axis
                                        copy=False) #Save memory        

        # REORIENTATION OF MINOR AXIS LABELS
        if minor_axis:
            if isinstance(minor_axis, basestring):
                inputarray = self.inputs[minor_axis] # values like 50, 60, 70, so want prefix/?
                newaxis = dict(zip(outpanel.minor_axis, inputarray)) 
                # end of day, want basically {'step_1':'vfrac_0.5, 'step_2', 'vfrac_0.10' ...
                if prefix:
                    # No delimiter (ie %s_%s) because prefix can set that eg prefix = layerd_ or layerd=
                    newaxis = dict((k,'%s%.4f' % (prefix, v)) for k, v in newaxis.items())
            elif isinstance(minor_axis, int):
                pass
            else:
                raise SimParserError('Can only map strings or integers to primary_panel, get type %s.'
                   ' These should correspond to the keys in %s' % (type(minor_axis, self.inputs)))

            outpanel = outpanel.rename(minor_axis = newaxis)
              
        return outpanel
コード例 #20
0
ファイル: ACISLoader.py プロジェクト: bnoon/acis-pandas
def ACISLoader(**params):
    # validate params
    #   validate elems
    #   calculate timeseries
    cvt_missing = params.pop("missing", "M")
    cvt_trace = params.pop("trace", "T")
    cvt_subseq = params.pop("subseq", "S")
    if "accum" in params:
        if params["accum"] == True:
            cvt_accum = lambda a: float(a[:-1])
        else:
            cvt_accum = lambda a: params["accum"]
    p_dict, options = check_params(params)
    columns = make_labels(p_dict["elems"])
    raw = make_request(p_dict, options["multi"])

    if "error" in raw:
        raise TypeError(raw["error"])

    if options["multi"]:
        sdate = p_dict.get("sdate", p_dict["date"])
        if isinstance(sdate, (list, tuple)):
            sdate = "-".join(map(str, sdate))
        raw, datum_slice = raw["data"], slice(0, None)
    else:
        sdate = raw["data"][0][0]
        raw, datum_slice = [raw], slice(1, None)

    all_data, all_meta = {}, {}

    dates = None
    one_date = "one_date" in options
    for stn_raw in raw:
        stn_data = dict([(key, []) for key in columns])
        meta = stn_raw["meta"]
        sid = meta["sids"][0].split(" ")[0]
        if one_date:
            raw_data = [stn_raw["data"]]
        else:
            raw_data = stn_raw["data"]
        if dates is None:
            dates = pd.date_range(sdate, periods=len(raw_data), freq=options["date_freq"])
        for datum in raw_data:
            for i, e in enumerate(datum[datum_slice]):
                try:
                    stn_data[columns[i]].append(float(e))
                except ValueError:
                    if e == "M":
                        stn_data[columns[i]].append(cvt_missing)
                    elif e == "T":
                        stn_data[columns[i]].append(cvt_trace)
                    elif e == "S":
                        stn_data[columns[i]].append(cvt_subseq)
                    elif e.endswith("A"):
                        stn_data[columns[i]].append(cvt_accum(e))
                    else:
                        stn_data[columns[i]].append(e)
        df = DataFrame(stn_data, index=dates)
        all_data[sid] = df
        all_meta[sid] = meta
    panel = Panel.from_dict(all_data)
    # Make a pd.DataFrame for meta
    # Indexed by first ID in sids. Should uid be used?
    sids = [k for k in all_meta]
    panel.meta = DataFrame([all_meta[k] for k in sids], index=sids)

    return panel
コード例 #21
0
 def __setitem__(self, key, val):
     instruments = dict(self.instruments)
     instruments[key] = val
     self.instruments = Panel.from_dict(instruments)
コード例 #22
0
ファイル: panel_ctor.py プロジェクト: TomAugspurger/pandas
 def time_from_dict(self):
     with warnings.catch_warnings(record=True):
         Panel.from_dict(self.data_frames)
コード例 #23
0
ファイル: panel_ctor.py プロジェクト: jerodestapa/pandas
 def time_from_dict(self):
     with warnings.catch_warnings(record=True):
         Panel.from_dict(self.data_frames)
コード例 #24
0
 def analyze(self):
     sampler_plotter.save_sampler_distribution(Panel.from_dict(self.sampler_dic), pop_dic=self.pop_dic, title=self.title, file_name=self.file_name)
コード例 #25
0
ファイル: databot.py プロジェクト: Mark1988huang/ppQuanTrade
    def getQuotes(self, tickers, fields=Fields.QUOTES, index=None, **kwargs):
        '''
        @summary: retrieve google finance data asked while initializing
        and store it: Date, open, low, high, close, volume
        @param quotes: list of quotes to fetch
        @param fields: list of fields to store per quotes
        @param index: pandas.Index object, used for dataframes
        @param kwargs.start: date or datetime of the first values
               kwargs.end: date or datetime of the last value
               kwargs.delta: datetime.timedelta object, period of time to fill
               kwargs.save: save to database downloaded quotes
               kwargs.reverse: reverse companie name and field in panel
               kwargs.symbols
               kwargs.markets
        @return a panel/dataframe/timeserie like close = data['google']['close'][date]
        '''
        ''' ----------------------------------------------------------------------------'''
        ''' ----------------------------------  Index check and build  -----------------'''
        #FIXME reversed dataframe could be store in database ?
        df      = dict()
        save    = kwargs.get('save', False)
        reverse = kwargs.get('reverse', False)
        markets = kwargs.get('markets', None)
        symbols = kwargs.get('symbols', None)
        if not isinstance(index, pd.DatetimeIndex):
            index = self._makeIndex(kwargs)
            if not isinstance(index, pd.DatetimeIndex):
                return None
        if not index.tzinfo:
            index = index.tz_localize(self.tz)
        assert (index.tzinfo)

        if self.connected['database']:
            symbols, markets = self.db.getTickersCodes(tickers)
        elif not symbols or not markets:
            self._logger.error('** No database neither informations provided')
            return None

        for ticker in tickers:
            if not ticker in symbols:
                self._logger.warning('No code availablefor {}, going on'.format(ticker))
                continue
            self._logger.info('Processing {} stock'.format(ticker))

            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Database check  ------------'''
            db_df, index = self._inspectDB(ticker, index, fields)
            assert (index.tzinfo)
            if not db_df.empty:
                assert (db_df.index.tzinfo)
                if index.size == 0:
                    save       = False
                    df[ticker] = db_df
                    continue

            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Remote retrievers  ---------'''
            self._logger.info('Downloading missing data, from {} to {}'
                              .format(index[0], index[-1]))
            #FIXME No index.freq for comaprison?
            #if (index[1] - index[0]) < pd.datetools.timedelta(days=1):
            if index.freq > pd.datetools.BDay():
                self._logger.info('Fetching minutely quotes ({})'.format(index.freq))
                #TODO truncate in the method
                network_df = DataFrame(self.remote.getMinutelyQuotes(
                                       symbols[ticker], markets[ticker], index),
                                       columns=fields).truncate(after=index[-1])
            else:
                network_df = DataFrame(self.remote.getHistoricalQuotes(
                                       symbols[ticker], index), columns=fields)

            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Merging  -------------------'''
            if not db_df.empty:
                self._logger.debug('Checking db index ({}) vs network index ({})'
                                   .format(db_df.index, network_df.index))
                if db_df.index[0] > network_df.index[0]:
                    df[ticker] = pd.concat([network_df, db_df])
                else:
                    df[ticker] = pd.concat([db_df, network_df]).sort_index()
            else:
                df[ticker] = network_df

        ''' ----------------------------------------------------------------------------'''
        ''' ----------------------------------------------  Manage final panel  --------'''
        data = Panel.from_dict(df, intersect=True)
        if save:
            #TODO: accumulation and compression of data issue, drop always true at the moment
            if self.connected['database']:
                self.db.updateStockDb(data, Fields.QUOTES, drop=True)
            else:
                self._logger.warning('! No database connection for saving.')
        if reverse:
            return Panel.from_dict(df, intersect=True, orient='minor')
        #NOTE if data used here, insert every FIELD.QUOTES columns
        #NOTE Only return Panel when one ticker and/or one field ?
        return Panel.from_dict(df, intersect=True)
コード例 #26
0
ファイル: window.py プロジェクト: cscanlin/pandas
def _flex_binary_moment(arg1, arg2, f, pairwise=False):
    from pandas import Series, DataFrame, Panel
    if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and
            isinstance(arg2, (np.ndarray, Series, DataFrame))):
        raise TypeError("arguments to moment function must be of type "
                        "np.ndarray/Series/DataFrame")

    if (isinstance(arg1, (np.ndarray, Series)) and
            isinstance(arg2, (np.ndarray, Series))):
        X, Y = _prep_binary(arg1, arg2)
        return f(X, Y)

    elif isinstance(arg1, DataFrame):

        def dataframe_from_int_dict(data, frame_template):
            result = DataFrame(data, index=frame_template.index)
            if len(result.columns) > 0:
                result.columns = frame_template.columns[result.columns]
            return result

        results = {}
        if isinstance(arg2, DataFrame):
            if pairwise is False:
                if arg1 is arg2:
                    # special case in order to handle duplicate column names
                    for i, col in enumerate(arg1.columns):
                        results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
                    return dataframe_from_int_dict(results, arg1)
                else:
                    if not arg1.columns.is_unique:
                        raise ValueError("'arg1' columns are not unique")
                    if not arg2.columns.is_unique:
                        raise ValueError("'arg2' columns are not unique")
                    X, Y = arg1.align(arg2, join='outer')
                    X = X + 0 * Y
                    Y = Y + 0 * X
                    res_columns = arg1.columns.union(arg2.columns)
                    for col in res_columns:
                        if col in X and col in Y:
                            results[col] = f(X[col], Y[col])
                    return DataFrame(results, index=X.index,
                                     columns=res_columns)
            elif pairwise is True:
                results = defaultdict(dict)
                for i, k1 in enumerate(arg1.columns):
                    for j, k2 in enumerate(arg2.columns):
                        if j < i and arg2 is arg1:
                            # Symmetric case
                            results[i][j] = results[j][i]
                        else:
                            results[i][j] = f(*_prep_binary(arg1.iloc[:, i],
                                                            arg2.iloc[:, j]))
                p = Panel.from_dict(results).swapaxes('items', 'major')
                if len(p.major_axis) > 0:
                    p.major_axis = arg1.columns[p.major_axis]
                if len(p.minor_axis) > 0:
                    p.minor_axis = arg2.columns[p.minor_axis]
                return p
            else:
                raise ValueError("'pairwise' is not True/False")
        else:
            results = {}
            for i, col in enumerate(arg1.columns):
                results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2))
            return dataframe_from_int_dict(results, arg1)

    else:
        return _flex_binary_moment(arg2, arg1, f)
コード例 #27
0
ファイル: test_sparse.py プロジェクト: hammer/pandas
 def test_dense_to_sparse(self):
     wp = Panel.from_dict(self.data_dict)
     dwp = wp.to_sparse()
     self.assert_(isinstance(dwp['ItemA']['A'], SparseSeries))
コード例 #28
0
ファイル: test_panel.py プロジェクト: shreeshbhat/pandas
 def test_dense_to_sparse(self):
     wp = Panel.from_dict(self.data_dict)
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         dwp = wp.to_sparse()
     tm.assertIsInstance(dwp['ItemA']['A'], SparseSeries)
コード例 #29
0
ファイル: Analyzer.py プロジェクト: emrahcem/cons-python
    
class PlotCDF(Analyzer):

    def analyze(self, result_panel):
        print 'CDF will be plotted'
        
if __name__ == '__main__':
#DATAFRAME
#     df_plotter=DataFramePlotter() 
#     dii={'sample':{1:2,2:3,4:67,5:70},'orig':{1:20,2:30,4:60,5:80}}
#     df_plotter.plot_data(DataFrame.from_dict(dii), pdf=True)
  
#PANEL    
#     pnl_plotter=PanelPlotter() 
    dp={'minor1':{'item1':{1:2,2:3,4:67,5:70},'item2':{1:20,2:30,4:60,5:80}}, 'minor2':{'item1':{1:5,2:7,4:47,5:10},'item2':{1:2,2:3,4:6,5:8}}, 'minor3':{'item1':{1:32,2:31,4:7,5:30},'item2':{1:30,2:20,4:50,5:80}}}
    pnl=Panel.from_dict(dp, orient='minor')
#     print pnl
#     for item in pnl.items:
#         print pnl[item]
#         DataFramePlotter().plot_data(pnl[item], cumulative=True, title=item)

#PANEL4D
    dp4={'label1':pnl, 'label2':pnl}
    pnl4D=Panel4D(dp4)
    for label in pnl4D.labels:
        #print label
        pnl=pnl4D.ix[label]
        for item in pnl.items:
            #print item
            #print pnl4D.ix[label,item]
            
コード例 #30
0
    
class PlotCDF(Analyzer):

    def analyze(self, result_panel):
        print 'CDF will be plotted'
        
if __name__ == '__main__':
#DATAFRAME
#     df_plotter=DataFramePlotter() 
#     dii={'sample':{1:2,2:3,4:67,5:70},'orig':{1:20,2:30,4:60,5:80}}
#     df_plotter.plot_data(DataFrame.from_dict(dii), pdf=True)
  
#PANEL    
#     pnl_plotter=PanelPlotter() 
    dp={'minor1':{'item1':{1:2,2:3,4:67,5:70},'item2':{1:20,2:30,4:60,5:80}}, 'minor2':{'item1':{1:5,2:7,4:47,5:10},'item2':{1:2,2:3,4:6,5:8}}, 'minor3':{'item1':{1:32,2:31,4:7,5:30},'item2':{1:30,2:20,4:50,5:80}}}
    pnl=Panel.from_dict(dp, orient='minor')
#     print pnl
#     for item in pnl.items:
#         print pnl[item]
#         DataFramePlotter().plot_data(pnl[item], cumulative=True, title=item)

#PANEL4D
    dp4={'label1':pnl, 'label2':pnl}
    pnl4D=Panel4D(dp4)
    for label in pnl4D.labels:
        #print label
        pnl=pnl4D.ix[label]
        for item in pnl.items:
            #print item
            #print pnl4D.ix[label,item]
            
コード例 #31
0
 def test_to_dense(self):
     dwp = self.panel.to_dense()
     dwp2 = Panel.from_dict(self.data_dict)
     assert_panel_equal(dwp, dwp2)
コード例 #32
0
 def test_dense_to_sparse(self):
     wp = Panel.from_dict(self.data_dict)
     dwp = wp.to_sparse()
     tm.assertIsInstance(dwp['ItemA']['A'], SparseSeries)
コード例 #33
0
ファイル: window.py プロジェクト: novumrx/pandas
def _flex_binary_moment(arg1, arg2, f, pairwise=False):
    from pandas import Series, DataFrame, Panel
    if not (isinstance(arg1, (np.ndarray, Series, DataFrame))
            and isinstance(arg2, (np.ndarray, Series, DataFrame))):
        raise TypeError("arguments to moment function must be of type "
                        "np.ndarray/Series/DataFrame")

    if (isinstance(arg1, (np.ndarray, Series))
            and isinstance(arg2, (np.ndarray, Series))):
        X, Y = _prep_binary(arg1, arg2)
        return f(X, Y)

    elif isinstance(arg1, DataFrame):

        def dataframe_from_int_dict(data, frame_template):
            result = DataFrame(data, index=frame_template.index)
            if len(result.columns) > 0:
                result.columns = frame_template.columns[result.columns]
            return result

        results = {}
        if isinstance(arg2, DataFrame):
            if pairwise is False:
                if arg1 is arg2:
                    # special case in order to handle duplicate column names
                    for i, col in enumerate(arg1.columns):
                        results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
                    return dataframe_from_int_dict(results, arg1)
                else:
                    if not arg1.columns.is_unique:
                        raise ValueError("'arg1' columns are not unique")
                    if not arg2.columns.is_unique:
                        raise ValueError("'arg2' columns are not unique")
                    X, Y = arg1.align(arg2, join='outer')
                    X = X + 0 * Y
                    Y = Y + 0 * X
                    res_columns = arg1.columns.union(arg2.columns)
                    for col in res_columns:
                        if col in X and col in Y:
                            results[col] = f(X[col], Y[col])
                    return DataFrame(results,
                                     index=X.index,
                                     columns=res_columns)
            elif pairwise is True:
                results = defaultdict(dict)
                for i, k1 in enumerate(arg1.columns):
                    for j, k2 in enumerate(arg2.columns):
                        if j < i and arg2 is arg1:
                            # Symmetric case
                            results[i][j] = results[j][i]
                        else:
                            results[i][j] = f(
                                *_prep_binary(arg1.iloc[:, i], arg2.iloc[:,
                                                                         j]))
                p = Panel.from_dict(results).swapaxes('items', 'major')
                if len(p.major_axis) > 0:
                    p.major_axis = arg1.columns[p.major_axis]
                if len(p.minor_axis) > 0:
                    p.minor_axis = arg2.columns[p.minor_axis]
                return p
            else:
                raise ValueError("'pairwise' is not True/False")
        else:
            results = {}
            for i, col in enumerate(arg1.columns):
                results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2))
            return dataframe_from_int_dict(results, arg1)

    else:
        return _flex_binary_moment(arg2, arg1, f)
コード例 #34
0
ファイル: column_panel.py プロジェクト: wenzi/trtools
    def _getitem_tuple(self, keys):
        items = {}
        for key in keys:
            items[key] = self[key]

        return Panel.from_dict(items)
コード例 #35
0
ファイル: test_panel.py プロジェクト: shreeshbhat/pandas
 def test_to_dense(self):
     dwp = self.panel.to_dense()
     dwp2 = Panel.from_dict(self.data_dict)
     tm.assert_panel_equal(dwp, dwp2)
コード例 #36
0
    def getQuotes(self, tickers, fields=Fields.QUOTES, index=None, **kwargs):
        '''
        @summary: retrieve google finance data asked while initializing
        and store it: Date, open, low, high, close, volume
        @param quotes: list of quotes to fetch
        @param fields: list of fields to store per quotes
        @param index: pandas.Index object, used for dataframes
        @param kwargs.start: date or datetime of the first values
               kwargs.end: date or datetime of the last value
               kwargs.delta: datetime.timedelta object, period of time to fill
               kwargs.save: save to database downloaded quotes
               kwargs.reverse: reverse companie name and field in panel
               kwargs.symbols
               kwargs.markets
        @return a panel/dataframe/timeserie like close = data['google']['close'][date]
        '''
        ''' ----------------------------------------------------------------------------'''
        ''' ----------------------------------  Index check and build  -----------------'''
        #FIXME reversed dataframe could be store in database ?
        df = dict()
        save = kwargs.get('save', False)
        reverse = kwargs.get('reverse', False)
        markets = kwargs.get('markets', None)
        symbols = kwargs.get('symbols', None)
        if not isinstance(index, pd.DatetimeIndex):
            index = self._makeIndex(kwargs)
            if not isinstance(index, pd.DatetimeIndex):
                return None
        if not index.tzinfo:
            index = index.tz_localize(self.tz)
        assert (index.tzinfo)

        if self.connected['database']:
            symbols, markets = self.db.getTickersCodes(tickers)
        elif not symbols or not markets:
            self._logger.error('** No database neither informations provided')
            return None

        for ticker in tickers:
            if not ticker in symbols:
                self._logger.warning(
                    'No code availablefor {}, going on'.format(ticker))
                continue
            self._logger.info('Processing {} stock'.format(ticker))
            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Database check  ------------'''
            db_df, index = self._inspectDB(ticker, index, fields)
            assert (index.tzinfo)
            if not db_df.empty:
                assert (db_df.index.tzinfo)
                if index.size == 0:
                    save = False
                    df[ticker] = db_df
                    continue
            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Remote retrievers  ---------'''
            self._logger.info('Downloading missing data, from {} to {}'.format(
                index[0], index[-1]))
            #FIXME No index.freq for comaprison?
            #if (index[1] - index[0]) < pd.datetools.timedelta(days=1):
            if index.freq > pd.datetools.BDay():
                self._logger.info('Fetching minutely quotes ({})'.format(
                    index.freq))
                #TODO truncate in the method
                network_df = DataFrame(
                    self.remote.getMinutelyQuotes(symbols[ticker],
                                                  markets[ticker], index),
                    columns=fields).truncate(after=index[-1])
            else:
                network_df = DataFrame(self.remote.getHistoricalQuotes(
                    symbols[ticker], index),
                                       columns=fields)
            ''' ----------------------------------------------------------------------------'''
            ''' ----------------------------------------------  Merging  -------------------'''
            if not db_df.empty:
                self._logger.debug(
                    'Checking db index ({}) vs network index ({})'.format(
                        db_df.index, network_df.index))
                if db_df.index[0] > network_df.index[0]:
                    df[ticker] = pd.concat([network_df, db_df])
                else:
                    df[ticker] = pd.concat([db_df, network_df]).sort_index()
            else:
                df[ticker] = network_df
        ''' ----------------------------------------------------------------------------'''
        ''' ----------------------------------------------  Manage final panel  --------'''
        data = Panel.from_dict(df, intersect=True)
        if save:
            #TODO: accumulation and compression of data issue, drop always true at the moment
            if self.connected['database']:
                self.db.updateStockDb(data, Fields.QUOTES, drop=True)
            else:
                self._logger.warning('! No database connection for saving.')
        if reverse:
            return Panel.from_dict(df, intersect=True, orient='minor')
        #NOTE if data used here, insert every FIELD.QUOTES columns
        #NOTE Only return Panel when one ticker and/or one field ?
        return Panel.from_dict(df, intersect=True)
コード例 #37
0
 def __setitem__(self, key, val):
     instruments = dict(self.instruments)
     instruments[key] = val
     self.instruments = Panel.from_dict(instruments)
コード例 #38
0
    def read(
        self,
        metrics,
        start=None,
        end=None,
        create_multiindex=True,
        remove_duplicate_indices=True,
    ):
        """ read the data from Graphite

        Arguments:
            metric (string): the metrics you want to look up
        start (string): the starting date timestamp.  All Graphite datestrings
            are allowed
        end (string): the ending date timestamp, same as start date
        smart_alias (bool): using only the metrics, which differs as table
            columns (default True)

        returns:
            a pandas DataFrame or Panel with the requested Data from Graphite
        """
        # sanity checks
        if not self.url:
            raise GraphiteDataError('No URL specified')
        else:
            url = urlparse.urljoin(self.url, self._render_api)

        if start is None:
            start = self._from

        if end is None:
            end = self._until

        if isinstance(metrics, string_types):
            df = self._download_single_metric(url, metrics, start, end)
            if create_multiindex:
                self._create_multiindex(df, remove_duplicate_indices)
        elif isinstance(metrics, list):
            dfs = []
            for metric in metrics:
                dfs.append(
                    self._download_single_metric(url, metric, start, end))
            df = concat(dfs, axis=1)
            if create_multiindex:
                self._create_multiindex(df, remove_duplicate_indices)
        elif isinstance(metrics, dict):
            warnings.warn(
                'To create a Panel from a dict of metric is a '
                'experimental feature. So don\'t use this in '
                'production! Because the resulting object may be '
                'changed in the future or the feature may be removed.')
            dfs = {}
            for label, metric in metrics.items():
                dfs[label] = self._download_single_metric(
                    url, metric, start, end)
                if create_multiindex:
                    self._create_multiindex(dfs[label],
                                            remove_duplicate_indices)
            df = Panel.from_dict(dfs)
        else:
            raise TypeError('metric has to be of type str or list')

        return df
コード例 #39
0
ファイル: column_panel.py プロジェクト: milktrader/trtools
    def _getitem_tuple(self, keys):
        items = {}
        for key in keys:
            items[key] = self[key]

        return Panel.from_dict(items)