def save_quotes(self, table, data, metadata={}, reset=False): table = datautils.clean_sid(table) if reset: self._reset_data(table) length = len(data) for date, row in progress.bar(data.iterrows(), expected_size=length): record = row.to_dict() record.update({'date': date}) record.update(metadata) report = rdb.table(table).insert(record).run(self.session) assert not report['errors']
def _load_quotes(self, sids, start, end, select): is_panel = not len(select) == 1 data = {} for table in sids: if not self.available(table): log.warning('{} not found in database, skipping' .format(table)) continue if select: select.append('date') cursor_data = rdb.table(datautils.clean_sid(table))\ .filter(lambda row: row['date'].during( start, end))\ .pluck(select)\ .run(self.session) else: # TODO pop 'id' field cursor_data = rdb.table(datautils.clean_sid(table))\ .filter(lambda row: row['date'].during( start, end))\ .run(self.session) data[table] = {} for row in cursor_data: # Remove rethinkdb automatic id row.pop('id', None) # tzinfo of the object is rethinkdb specific date = row.pop('date').astimezone(pytz.utc) data[table][date] = row if is_panel else row[select[0]] if is_panel: # FIXME Missing data data = {k: v for k, v in data.iteritems() if len(v) > 0} data = pd.Panel(data).transpose(0, 2, 1) else: data = pd.DataFrame(data).fillna(method='pad') return data
def decorator(self, symbols, **kwargs): if not isinstance(symbols, list): symbols = [symbols] quandl_symbols = map(_build_quandl_code, symbols) raw_data = fct(self, quandl_symbols, **kwargs) data = {} for sid in symbols: data[sid] = raw_data.filter(regex='.*{}.*'.format( utils.clean_sid(sid).upper())) data[sid].columns = map( lambda x: x.replace(' ', '_').lower().split('_-_')[-1], data[sid].columns) return data
def _load_quotes(self, sids, start, end, select): is_panel = not len(select) == 1 data = {} for table in sids: if not self.available(table): log.warning('{} not found in database, skipping'.format(table)) continue if select: select.append('date') cursor_data = rdb.table(datautils.clean_sid(table))\ .filter(lambda row: row['date'].during( start, end))\ .pluck(select)\ .run(self.session) else: # TODO pop 'id' field cursor_data = rdb.table(datautils.clean_sid(table))\ .filter(lambda row: row['date'].during( start, end))\ .run(self.session) data[table] = {} for row in cursor_data: # Remove rethinkdb automatic id row.pop('id', None) # tzinfo of the object is rethinkdb specific date = row.pop('date').astimezone(pytz.utc) data[table][date] = row if is_panel else row[select[0]] if is_panel: # FIXME Missing data data = {k: v for k, v in data.iteritems() if len(v) > 0} data = pd.Panel(data).transpose(0, 2, 1) else: data = pd.DataFrame(data).fillna(method='pad') return data
def last_chrono_entry(self, table): return rdb.table(datautils.clean_sid(table))\ .order_by(rdb.desc('date'))\ .limit(1)\ .pluck(['date'])\ .run(self.session)[0]
def available(self, table): # TODO Check with dates return datautils.clean_sid(table) in rdb.table_list().run(self.session)