def test_panel_join_many(self): tm.K = 10 panel = tm.makePanel() tm.K = 4 panels = [panel.ix[:2], panel.ix[2:6], panel.ix[6:]] joined = panels[0].join(panels[1:]) tm.assert_panel_equal(joined, panel) panels = [panel.ix[:2, :-5], panel.ix[2:6, 2:], panel.ix[6:, 5:-7]] data_dict = {} for p in panels: data_dict.update(p.iteritems()) joined = panels[0].join(panels[1:], how='inner') expected = Panel.from_dict(data_dict, intersect=True) tm.assert_panel_equal(joined, expected) joined = panels[0].join(panels[1:], how='outer') expected = Panel.from_dict(data_dict, intersect=False) tm.assert_panel_equal(joined, expected) # edge cases self.assertRaises(ValueError, panels[0].join, panels[1:], how='outer', lsuffix='foo', rsuffix='bar') self.assertRaises(ValueError, panels[0].join, panels[1:], how='right')
def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs): ''' Return a quote panel ''' #TODO Replace adj_close with actual_close #TODO Add reindex methods, and start, end, delta parameters reverse = kwargs.get('reverse', False) verbose = kwargs.get('verbose', False) if self.connected['database']: symbols, markets = self.db.getTickersCodes(tickers) elif not symbols: self._logger.error('** No database neither informations provided') return None timestamps = du.getNYSEdays(index[0], index[-1], dt.timedelta(hours=16)) csv = da.DataAccess('Yahoo') df = csv.get_data(timestamps, symbols.values(), fields, verbose=verbose) quotes_dict = dict() for ticker in tickers: j = 0 quotes_dict[ticker] = dict() for field in fields: serie = df[j][symbols[ticker]].groupby(index.freq.rollforward).aggregate(np.mean) #TODO add a function parameter to decide what to do about it clean_serie = serie.fillna(method='pad') quotes_dict[ticker][field] = clean_serie j += 1 if reverse: return Panel.from_dict(quotes_dict, intersect=True, orient='minor') return Panel.from_dict(quotes_dict, intersect=True)
def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs): ''' Return a quote panel ''' #TODO Replace adj_close with actual_close #TODO Add reindex methods, and start, end, delta parameters reverse = kwargs.get('reverse', False) verbose = kwargs.get('verbose', False) if self.connected['database']: symbols, markets = self.db.getTickersCodes(tickers) elif not symbols: self._logger.error('** No database neither informations provided') return None timestamps = du.getNYSEdays(index[0], index[-1], dt.timedelta(hours=16)) csv = da.DataAccess('Yahoo') df = csv.get_data(timestamps, symbols.values(), fields, verbose=verbose) quotes_dict = dict() for ticker in tickers: j = 0 quotes_dict[ticker] = dict() for field in fields: serie = df[j][symbols[ticker]].groupby( index.freq.rollforward).aggregate(np.mean) #TODO add a function parameter to decide what to do about it clean_serie = serie.fillna(method='pad') quotes_dict[ticker][field] = clean_serie j += 1 if reverse: return Panel.from_dict(quotes_dict, intersect=True, orient='minor') return Panel.from_dict(quotes_dict, intersect=True)
def get_divergence_scores_panel(self, divergence=divergence_metrics.JensenShannonDivergence): exp_panel=Panel.from_dict(self.sampler_dic) exp_dic={} for exp_no in exp_panel.items: q_dic={} for query in exp_panel.major_axis: f_dic={} for feature in exp_panel.minor_axis: K=exp_panel.ix[exp_no,query,feature] H= self.pop_dic[feature]#orig_panel.ix[0,major,minor] f_dic[feature]=divergence.compute(H, K) q_dic[query]=Series(f_dic) exp_dic[exp_no]=DataFrame(q_dic) return Panel.from_dict(exp_dic, orient='minor')
def rolling_corr_pairwise(df, window, min_periods=None): """ Computes pairwise rolling correlation matrices as Panel whose items are dates Parameters ---------- df : DataFrame window : int min_periods : int, default None Returns ------- correls : Panel """ from pandas import Panel from collections import defaultdict all_results = defaultdict(dict) for i, k1 in enumerate(df.columns): for k2 in df.columns[i:]: corr = rolling_corr(df[k1], df[k2], window, min_periods=min_periods) all_results[k1][k2] = corr all_results[k2][k1] = corr return Panel.from_dict(all_results).swapaxes('items', 'major')
def primary_panel(self, minor_axis=None, prefix=None): """ Returns primary as a Panel if possible, if fails, raises warning and returns as dict. """ primary_of_df = { } #Create a primary of dataframes, so has to convert all values to DF's ignoring = [] # If can't convert a value to df, let user know try: wavelengths = self.static[ globalparms.spectralparameters]['lambdas'] except Exception: logging.warning( 'Could not find lambdas in self.static, primary panel will' ' not be indexed by wavelength...') wavelengths = None # Try to convert to dataframes. If fails on one step, should fail on all the steps for step, data in self.primary.items(): primary_of_df[step] = DataFrame(data, index=wavelengths) # Panel with as simulation variabless as major axis (ie A_avg, R_0) outpanel = Panel.from_dict(primary_of_df, orient='minor') # Sort Items alphabetically (R_avg, R_0, R_1, T_avg, ...) outpanel = outpanel.reindex_axis(sorted( outpanel.items)) #<-- Sorted items alphabetically # Sort Minor axis with integer suffix (step_0, step_1, step_2) # http://stackoverflow.com/questions/4287209/sort-list-of-strings-by-integer-suffix-in-python outpanel = outpanel.reindex_axis( putil.stepsort(outpanel.minor_axis), axis=2, #items axis copy=False) #Save memory # REORIENTATION OF MINOR AXIS LABELS if minor_axis: if isinstance(minor_axis, basestring): inputarray = self.inputs[ minor_axis] # values like 50, 60, 70, so want prefix/? newaxis = dict(zip(outpanel.minor_axis, inputarray)) # end of day, want basically {'step_1':'vfrac_0.5, 'step_2', 'vfrac_0.10' ... if prefix: # No delimiter (ie %s_%s) because prefix can set that eg prefix = layerd_ or layerd= newaxis = dict((k, '%s%.4f' % (prefix, v)) for k, v in newaxis.items()) elif isinstance(minor_axis, int): pass else: raise SimParserError( 'Can only map strings or integers to primary_panel, get type %s.' ' These should correspond to the keys in %s' % (type(minor_axis, self.inputs))) outpanel = outpanel.rename(minor_axis=newaxis) return outpanel
def apply(self, func, *args, **kwargs): result = {} for key, df in self.obj.iteritems(): grp = DataFrameGroupBy(df, grouper=self.grouper) if not callable(func): f = getattr(grp, func) res = f(*args, **kwargs) result[key] = res return Panel.from_dict(result)
def _check(frame): dense_frame = frame.to_dense() wp = Panel.from_dict({"foo": frame}) from_dense_lp = wp.to_frame() from_sparse_lp = spf.stack_sparse_frame(frame) self.assert_(np.array_equal(from_dense_lp.values, from_sparse_lp.values))
def _check(frame): dense_frame = frame.to_dense() wp = Panel.from_dict({'foo': frame}) from_dense_lp = wp.to_frame() from_sparse_lp = spf.stack_sparse_frame(frame) self.assert_( np.array_equal(from_dense_lp.values, from_sparse_lp.values))
def testFamaMacBethRolling(self): # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y, # nw_lags_beta=2) df = DataFrame(np.random.randn(50, 10)) x = dict((k, df) for k in 'abcdefg') x = Panel.from_dict(x) y = df + DataFrame(0.01 * np.random.randn(50, 10)) self.checkFamaMacBethExtended('rolling', x, y, nw_lags_beta=2) self.checkFamaMacBethExtended('expanding', x, y, nw_lags_beta=2)
def _check(frame): dense_frame = frame.to_dense() # noqa wp = Panel.from_dict({'foo': frame}) from_dense_lp = wp.to_frame() from_sparse_lp = spf.stack_sparse_frame(frame) self.assert_numpy_array_equal(from_dense_lp.values, from_sparse_lp.values)
def _check(frame): dense_frame = frame.to_dense() # noqa wp = Panel.from_dict({'foo': frame}) from_dense_lp = wp.to_frame() from_sparse_lp = spf.stack_sparse_frame(frame) tm.assert_numpy_array_equal(from_dense_lp.values, from_sparse_lp.values)
def testFamaMacBethRolling(self): # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y, # nw_lags_beta=2) # df = DataFrame(np.random.randn(50, 10)) x = dict((k, DataFrame(np.random.randn(50, 10))) for k in "abcdefg") x = Panel.from_dict(x) y = DataFrame(np.random.randn(50, 10)) + DataFrame(0.01 * np.random.randn(50, 10)) self.checkFamaMacBethExtended("rolling", x, y, nw_lags_beta=2) self.checkFamaMacBethExtended("expanding", x, y, nw_lags_beta=2)
def analyze(self): panel=Panel.from_dict(self.sampler_dic) t_panel=panel.transpose(1,0,2) num_of_queries= len(t_panel.items) for i,query in enumerate(t_panel.items): dic={} #for (feature,distribution) in t_panel[query].apply(dict_ops.avg_list_of_dictionaries,axis=0, reduce=False).iteritems(): # dic[feature]=distribution for feature in t_panel[query].columns: dic[feature] = dict_ops.avg_list_of_dictionaries(t_panel[query][feature].tolist()) descriptive_stats_plotter.save_pdf_cdf_plot_for_a_single_graph(dic, self.title+'\n('+query+')', statistics_included=True, file_name= self.file_name.rsplit('.',1)[0]+'_'+query+'.'+self.file_name.rsplit('.',1)[1], show=True if i==num_of_queries-1 else False)
def primary_panel(self, minor_axis=None, prefix=None): """ Returns primary as a Panel if possible, if fails, raises warning and returns as dict. """ primary_of_df = {} #Create a primary of dataframes, so has to convert all values to DF's ignoring = [] # If can't convert a value to df, let user know try: wavelengths = self.static[globalparms.spectralparameters]['lambdas'] except Exception: logging.warning('Could not find lambdas in self.static, primary panel will' ' not be indexed by wavelength...') wavelengths = None # Try to convert to dataframes. If fails on one step, should fail on all the steps for step, data in self.primary.items(): primary_of_df[step] = DataFrame(data, index=wavelengths) # Panel with as simulation variabless as major axis (ie A_avg, R_0) outpanel = Panel.from_dict(primary_of_df, orient='minor') # Sort Items alphabetically (R_avg, R_0, R_1, T_avg, ...) outpanel = outpanel.reindex_axis(sorted(outpanel.items)) #<-- Sorted items alphabetically # Sort Minor axis with integer suffix (step_0, step_1, step_2) # http://stackoverflow.com/questions/4287209/sort-list-of-strings-by-integer-suffix-in-python outpanel = outpanel.reindex_axis(putil.stepsort(outpanel.minor_axis), axis=2, #items axis copy=False) #Save memory # REORIENTATION OF MINOR AXIS LABELS if minor_axis: if isinstance(minor_axis, basestring): inputarray = self.inputs[minor_axis] # values like 50, 60, 70, so want prefix/? newaxis = dict(zip(outpanel.minor_axis, inputarray)) # end of day, want basically {'step_1':'vfrac_0.5, 'step_2', 'vfrac_0.10' ... if prefix: # No delimiter (ie %s_%s) because prefix can set that eg prefix = layerd_ or layerd= newaxis = dict((k,'%s%.4f' % (prefix, v)) for k, v in newaxis.items()) elif isinstance(minor_axis, int): pass else: raise SimParserError('Can only map strings or integers to primary_panel, get type %s.' ' These should correspond to the keys in %s' % (type(minor_axis, self.inputs))) outpanel = outpanel.rename(minor_axis = newaxis) return outpanel
def ACISLoader(**params): # validate params # validate elems # calculate timeseries cvt_missing = params.pop("missing", "M") cvt_trace = params.pop("trace", "T") cvt_subseq = params.pop("subseq", "S") if "accum" in params: if params["accum"] == True: cvt_accum = lambda a: float(a[:-1]) else: cvt_accum = lambda a: params["accum"] p_dict, options = check_params(params) columns = make_labels(p_dict["elems"]) raw = make_request(p_dict, options["multi"]) if "error" in raw: raise TypeError(raw["error"]) if options["multi"]: sdate = p_dict.get("sdate", p_dict["date"]) if isinstance(sdate, (list, tuple)): sdate = "-".join(map(str, sdate)) raw, datum_slice = raw["data"], slice(0, None) else: sdate = raw["data"][0][0] raw, datum_slice = [raw], slice(1, None) all_data, all_meta = {}, {} dates = None one_date = "one_date" in options for stn_raw in raw: stn_data = dict([(key, []) for key in columns]) meta = stn_raw["meta"] sid = meta["sids"][0].split(" ")[0] if one_date: raw_data = [stn_raw["data"]] else: raw_data = stn_raw["data"] if dates is None: dates = pd.date_range(sdate, periods=len(raw_data), freq=options["date_freq"]) for datum in raw_data: for i, e in enumerate(datum[datum_slice]): try: stn_data[columns[i]].append(float(e)) except ValueError: if e == "M": stn_data[columns[i]].append(cvt_missing) elif e == "T": stn_data[columns[i]].append(cvt_trace) elif e == "S": stn_data[columns[i]].append(cvt_subseq) elif e.endswith("A"): stn_data[columns[i]].append(cvt_accum(e)) else: stn_data[columns[i]].append(e) df = DataFrame(stn_data, index=dates) all_data[sid] = df all_meta[sid] = meta panel = Panel.from_dict(all_data) # Make a pd.DataFrame for meta # Indexed by first ID in sids. Should uid be used? sids = [k for k in all_meta] panel.meta = DataFrame([all_meta[k] for k in sids], index=sids) return panel
def __setitem__(self, key, val): instruments = dict(self.instruments) instruments[key] = val self.instruments = Panel.from_dict(instruments)
def time_from_dict(self): with warnings.catch_warnings(record=True): Panel.from_dict(self.data_frames)
def analyze(self): sampler_plotter.save_sampler_distribution(Panel.from_dict(self.sampler_dic), pop_dic=self.pop_dic, title=self.title, file_name=self.file_name)
def getQuotes(self, tickers, fields=Fields.QUOTES, index=None, **kwargs): ''' @summary: retrieve google finance data asked while initializing and store it: Date, open, low, high, close, volume @param quotes: list of quotes to fetch @param fields: list of fields to store per quotes @param index: pandas.Index object, used for dataframes @param kwargs.start: date or datetime of the first values kwargs.end: date or datetime of the last value kwargs.delta: datetime.timedelta object, period of time to fill kwargs.save: save to database downloaded quotes kwargs.reverse: reverse companie name and field in panel kwargs.symbols kwargs.markets @return a panel/dataframe/timeserie like close = data['google']['close'][date] ''' ''' ----------------------------------------------------------------------------''' ''' ---------------------------------- Index check and build -----------------''' #FIXME reversed dataframe could be store in database ? df = dict() save = kwargs.get('save', False) reverse = kwargs.get('reverse', False) markets = kwargs.get('markets', None) symbols = kwargs.get('symbols', None) if not isinstance(index, pd.DatetimeIndex): index = self._makeIndex(kwargs) if not isinstance(index, pd.DatetimeIndex): return None if not index.tzinfo: index = index.tz_localize(self.tz) assert (index.tzinfo) if self.connected['database']: symbols, markets = self.db.getTickersCodes(tickers) elif not symbols or not markets: self._logger.error('** No database neither informations provided') return None for ticker in tickers: if not ticker in symbols: self._logger.warning('No code availablefor {}, going on'.format(ticker)) continue self._logger.info('Processing {} stock'.format(ticker)) ''' ----------------------------------------------------------------------------''' ''' ---------------------------------------------- Database check ------------''' db_df, index = self._inspectDB(ticker, index, fields) assert (index.tzinfo) if not db_df.empty: assert (db_df.index.tzinfo) if index.size == 0: save = False df[ticker] = db_df continue ''' ----------------------------------------------------------------------------''' ''' ---------------------------------------------- Remote retrievers ---------''' self._logger.info('Downloading missing data, from {} to {}' .format(index[0], index[-1])) #FIXME No index.freq for comaprison? #if (index[1] - index[0]) < pd.datetools.timedelta(days=1): if index.freq > pd.datetools.BDay(): self._logger.info('Fetching minutely quotes ({})'.format(index.freq)) #TODO truncate in the method network_df = DataFrame(self.remote.getMinutelyQuotes( symbols[ticker], markets[ticker], index), columns=fields).truncate(after=index[-1]) else: network_df = DataFrame(self.remote.getHistoricalQuotes( symbols[ticker], index), columns=fields) ''' ----------------------------------------------------------------------------''' ''' ---------------------------------------------- Merging -------------------''' if not db_df.empty: self._logger.debug('Checking db index ({}) vs network index ({})' .format(db_df.index, network_df.index)) if db_df.index[0] > network_df.index[0]: df[ticker] = pd.concat([network_df, db_df]) else: df[ticker] = pd.concat([db_df, network_df]).sort_index() else: df[ticker] = network_df ''' ----------------------------------------------------------------------------''' ''' ---------------------------------------------- Manage final panel --------''' data = Panel.from_dict(df, intersect=True) if save: #TODO: accumulation and compression of data issue, drop always true at the moment if self.connected['database']: self.db.updateStockDb(data, Fields.QUOTES, drop=True) else: self._logger.warning('! No database connection for saving.') if reverse: return Panel.from_dict(df, intersect=True, orient='minor') #NOTE if data used here, insert every FIELD.QUOTES columns #NOTE Only return Panel when one ticker and/or one field ? return Panel.from_dict(df, intersect=True)
def _flex_binary_moment(arg1, arg2, f, pairwise=False): from pandas import Series, DataFrame, Panel if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and isinstance(arg2, (np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") if (isinstance(arg1, (np.ndarray, Series)) and isinstance(arg2, (np.ndarray, Series))): X, Y = _prep_binary(arg1, arg2) return f(X, Y) elif isinstance(arg1, DataFrame): def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) if len(result.columns) > 0: result.columns = frame_template.columns[result.columns] return result results = {} if isinstance(arg2, DataFrame): if pairwise is False: if arg1 is arg2: # special case in order to handle duplicate column names for i, col in enumerate(arg1.columns): results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) return dataframe_from_int_dict(results, arg1) else: if not arg1.columns.is_unique: raise ValueError("'arg1' columns are not unique") if not arg2.columns.is_unique: raise ValueError("'arg2' columns are not unique") X, Y = arg1.align(arg2, join='outer') X = X + 0 * Y Y = Y + 0 * X res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: results[col] = f(X[col], Y[col]) return DataFrame(results, index=X.index, columns=res_columns) elif pairwise is True: results = defaultdict(dict) for i, k1 in enumerate(arg1.columns): for j, k2 in enumerate(arg2.columns): if j < i and arg2 is arg1: # Symmetric case results[i][j] = results[j][i] else: results[i][j] = f(*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])) p = Panel.from_dict(results).swapaxes('items', 'major') if len(p.major_axis) > 0: p.major_axis = arg1.columns[p.major_axis] if len(p.minor_axis) > 0: p.minor_axis = arg2.columns[p.minor_axis] return p else: raise ValueError("'pairwise' is not True/False") else: results = {} for i, col in enumerate(arg1.columns): results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) return dataframe_from_int_dict(results, arg1) else: return _flex_binary_moment(arg2, arg1, f)
def test_dense_to_sparse(self): wp = Panel.from_dict(self.data_dict) dwp = wp.to_sparse() self.assert_(isinstance(dwp['ItemA']['A'], SparseSeries))
def test_dense_to_sparse(self): wp = Panel.from_dict(self.data_dict) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): dwp = wp.to_sparse() tm.assertIsInstance(dwp['ItemA']['A'], SparseSeries)
class PlotCDF(Analyzer): def analyze(self, result_panel): print 'CDF will be plotted' if __name__ == '__main__': #DATAFRAME # df_plotter=DataFramePlotter() # dii={'sample':{1:2,2:3,4:67,5:70},'orig':{1:20,2:30,4:60,5:80}} # df_plotter.plot_data(DataFrame.from_dict(dii), pdf=True) #PANEL # pnl_plotter=PanelPlotter() dp={'minor1':{'item1':{1:2,2:3,4:67,5:70},'item2':{1:20,2:30,4:60,5:80}}, 'minor2':{'item1':{1:5,2:7,4:47,5:10},'item2':{1:2,2:3,4:6,5:8}}, 'minor3':{'item1':{1:32,2:31,4:7,5:30},'item2':{1:30,2:20,4:50,5:80}}} pnl=Panel.from_dict(dp, orient='minor') # print pnl # for item in pnl.items: # print pnl[item] # DataFramePlotter().plot_data(pnl[item], cumulative=True, title=item) #PANEL4D dp4={'label1':pnl, 'label2':pnl} pnl4D=Panel4D(dp4) for label in pnl4D.labels: #print label pnl=pnl4D.ix[label] for item in pnl.items: #print item #print pnl4D.ix[label,item]
def test_to_dense(self): dwp = self.panel.to_dense() dwp2 = Panel.from_dict(self.data_dict) assert_panel_equal(dwp, dwp2)
def test_dense_to_sparse(self): wp = Panel.from_dict(self.data_dict) dwp = wp.to_sparse() tm.assertIsInstance(dwp['ItemA']['A'], SparseSeries)
def _flex_binary_moment(arg1, arg2, f, pairwise=False): from pandas import Series, DataFrame, Panel if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and isinstance(arg2, (np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") if (isinstance(arg1, (np.ndarray, Series)) and isinstance(arg2, (np.ndarray, Series))): X, Y = _prep_binary(arg1, arg2) return f(X, Y) elif isinstance(arg1, DataFrame): def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) if len(result.columns) > 0: result.columns = frame_template.columns[result.columns] return result results = {} if isinstance(arg2, DataFrame): if pairwise is False: if arg1 is arg2: # special case in order to handle duplicate column names for i, col in enumerate(arg1.columns): results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) return dataframe_from_int_dict(results, arg1) else: if not arg1.columns.is_unique: raise ValueError("'arg1' columns are not unique") if not arg2.columns.is_unique: raise ValueError("'arg2' columns are not unique") X, Y = arg1.align(arg2, join='outer') X = X + 0 * Y Y = Y + 0 * X res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: results[col] = f(X[col], Y[col]) return DataFrame(results, index=X.index, columns=res_columns) elif pairwise is True: results = defaultdict(dict) for i, k1 in enumerate(arg1.columns): for j, k2 in enumerate(arg2.columns): if j < i and arg2 is arg1: # Symmetric case results[i][j] = results[j][i] else: results[i][j] = f( *_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])) p = Panel.from_dict(results).swapaxes('items', 'major') if len(p.major_axis) > 0: p.major_axis = arg1.columns[p.major_axis] if len(p.minor_axis) > 0: p.minor_axis = arg2.columns[p.minor_axis] return p else: raise ValueError("'pairwise' is not True/False") else: results = {} for i, col in enumerate(arg1.columns): results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) return dataframe_from_int_dict(results, arg1) else: return _flex_binary_moment(arg2, arg1, f)
def _getitem_tuple(self, keys): items = {} for key in keys: items[key] = self[key] return Panel.from_dict(items)
def test_to_dense(self): dwp = self.panel.to_dense() dwp2 = Panel.from_dict(self.data_dict) tm.assert_panel_equal(dwp, dwp2)
def getQuotes(self, tickers, fields=Fields.QUOTES, index=None, **kwargs): ''' @summary: retrieve google finance data asked while initializing and store it: Date, open, low, high, close, volume @param quotes: list of quotes to fetch @param fields: list of fields to store per quotes @param index: pandas.Index object, used for dataframes @param kwargs.start: date or datetime of the first values kwargs.end: date or datetime of the last value kwargs.delta: datetime.timedelta object, period of time to fill kwargs.save: save to database downloaded quotes kwargs.reverse: reverse companie name and field in panel kwargs.symbols kwargs.markets @return a panel/dataframe/timeserie like close = data['google']['close'][date] ''' ''' ----------------------------------------------------------------------------''' ''' ---------------------------------- Index check and build -----------------''' #FIXME reversed dataframe could be store in database ? df = dict() save = kwargs.get('save', False) reverse = kwargs.get('reverse', False) markets = kwargs.get('markets', None) symbols = kwargs.get('symbols', None) if not isinstance(index, pd.DatetimeIndex): index = self._makeIndex(kwargs) if not isinstance(index, pd.DatetimeIndex): return None if not index.tzinfo: index = index.tz_localize(self.tz) assert (index.tzinfo) if self.connected['database']: symbols, markets = self.db.getTickersCodes(tickers) elif not symbols or not markets: self._logger.error('** No database neither informations provided') return None for ticker in tickers: if not ticker in symbols: self._logger.warning( 'No code availablefor {}, going on'.format(ticker)) continue self._logger.info('Processing {} stock'.format(ticker)) ''' ----------------------------------------------------------------------------''' ''' ---------------------------------------------- Database check ------------''' db_df, index = self._inspectDB(ticker, index, fields) assert (index.tzinfo) if not db_df.empty: assert (db_df.index.tzinfo) if index.size == 0: save = False df[ticker] = db_df continue ''' ----------------------------------------------------------------------------''' ''' ---------------------------------------------- Remote retrievers ---------''' self._logger.info('Downloading missing data, from {} to {}'.format( index[0], index[-1])) #FIXME No index.freq for comaprison? #if (index[1] - index[0]) < pd.datetools.timedelta(days=1): if index.freq > pd.datetools.BDay(): self._logger.info('Fetching minutely quotes ({})'.format( index.freq)) #TODO truncate in the method network_df = DataFrame( self.remote.getMinutelyQuotes(symbols[ticker], markets[ticker], index), columns=fields).truncate(after=index[-1]) else: network_df = DataFrame(self.remote.getHistoricalQuotes( symbols[ticker], index), columns=fields) ''' ----------------------------------------------------------------------------''' ''' ---------------------------------------------- Merging -------------------''' if not db_df.empty: self._logger.debug( 'Checking db index ({}) vs network index ({})'.format( db_df.index, network_df.index)) if db_df.index[0] > network_df.index[0]: df[ticker] = pd.concat([network_df, db_df]) else: df[ticker] = pd.concat([db_df, network_df]).sort_index() else: df[ticker] = network_df ''' ----------------------------------------------------------------------------''' ''' ---------------------------------------------- Manage final panel --------''' data = Panel.from_dict(df, intersect=True) if save: #TODO: accumulation and compression of data issue, drop always true at the moment if self.connected['database']: self.db.updateStockDb(data, Fields.QUOTES, drop=True) else: self._logger.warning('! No database connection for saving.') if reverse: return Panel.from_dict(df, intersect=True, orient='minor') #NOTE if data used here, insert every FIELD.QUOTES columns #NOTE Only return Panel when one ticker and/or one field ? return Panel.from_dict(df, intersect=True)
def read( self, metrics, start=None, end=None, create_multiindex=True, remove_duplicate_indices=True, ): """ read the data from Graphite Arguments: metric (string): the metrics you want to look up start (string): the starting date timestamp. All Graphite datestrings are allowed end (string): the ending date timestamp, same as start date smart_alias (bool): using only the metrics, which differs as table columns (default True) returns: a pandas DataFrame or Panel with the requested Data from Graphite """ # sanity checks if not self.url: raise GraphiteDataError('No URL specified') else: url = urlparse.urljoin(self.url, self._render_api) if start is None: start = self._from if end is None: end = self._until if isinstance(metrics, string_types): df = self._download_single_metric(url, metrics, start, end) if create_multiindex: self._create_multiindex(df, remove_duplicate_indices) elif isinstance(metrics, list): dfs = [] for metric in metrics: dfs.append( self._download_single_metric(url, metric, start, end)) df = concat(dfs, axis=1) if create_multiindex: self._create_multiindex(df, remove_duplicate_indices) elif isinstance(metrics, dict): warnings.warn( 'To create a Panel from a dict of metric is a ' 'experimental feature. So don\'t use this in ' 'production! Because the resulting object may be ' 'changed in the future or the feature may be removed.') dfs = {} for label, metric in metrics.items(): dfs[label] = self._download_single_metric( url, metric, start, end) if create_multiindex: self._create_multiindex(dfs[label], remove_duplicate_indices) df = Panel.from_dict(dfs) else: raise TypeError('metric has to be of type str or list') return df