def remove(self, path): s = HDFStore(self.path) if path in s: print("removing %s" % path) s.remove(path) s.flush(fsync=True) s.close()
def _put(self, path, obj): s = HDFStore(self.path) if path in s: print("updating %s" % path) s.remove(path) s.close() s = HDFStore(self.path) s[path] = obj s.flush(fsync=True) s.close()
def to_frame_hdf(self, store_path, store_key, df_cb=None, max_msg=None, usecols=None, chunk_cnt=CHUNK_CNT, show_prog=True): store = HDFStore(store_path, 'w') df = self._to_frame(usecols, chunk_cnt, show_prog) df['msg'] = df['msg'].apply(lambda m: m.encode('utf8')) if df_cb is not None: df_cb(df) min_itemsize = {'kind': 20, 'msg': 255} if max_msg is not None: min_itemsize['msg'] = max_msg store.put(store_key, df, format='table', min_itemsize=min_itemsize) store.flush() store.close()
def to_frame_hdf(self, store_path, store_key, df_cb=None, max_msg=None, usecols=None, chunk_cnt=CHUNK_CNT): """Convert to Pandas DataFrame and save to HDF then returns HDFStore.""" store = HDFStore(store_path, 'w') _c = self._to_frame_prop('to_frame_hdf', False) for df in self._to_frame_gen(_c, usecols, chunk_cnt): min_itemsize = {'kind': 20, 'msg': 255} # pytables not support unicode for now df['msg'] = df['msg'].apply(lambda m: m.encode('utf8')) if df_cb is not None: df_cb(df) if max_msg is not None: min_itemsize['msg'] = max_msg store.append(store_key, df, format='table', min_itemsize=min_itemsize) store.flush() store.close() _c.pg.done()
return conn.cursor(cursor_factory=DictCursor) if __name__ == '__main__': pre_cursor = cursor('pre') post_cursor = cursor('post') sql = 'SELECT x, y, z, value FROM points''' # Get data in two threads to speed things up pre_t = Thread(target=pre_cursor.execute, args=(sql,)) pre_t.start() post_t = Thread(target=post_cursor.execute, args=(sql,)) post_t.start() pre_t.join() post_t.join() # Create data frames pre = DataFrame.from_records([dict(row) for row in pre_cursor]) post = DataFrame.from_records([dict(row) for row in post_cursor]) # Store data frame in HDF5 data store store_file = 'points.h5' store = HDFStore(store_file) store['pre'] = pre store['post'] = post store.flush() print('Data stored at {}'.format(store_file))
conn = psycopg2.connect(database='points_{}'.format(step)) return conn.cursor(cursor_factory=DictCursor) if __name__ == '__main__': pre_cursor = cursor('pre') post_cursor = cursor('post') sql = 'SELECT x, y, z, value FROM points' '' # Get data in two threads to speed things up pre_t = Thread(target=pre_cursor.execute, args=(sql, )) pre_t.start() post_t = Thread(target=post_cursor.execute, args=(sql, )) post_t.start() pre_t.join() post_t.join() # Create data frames pre = DataFrame.from_records([dict(row) for row in pre_cursor]) post = DataFrame.from_records([dict(row) for row in post_cursor]) # Store data frame in HDF5 data store store_file = 'points.h5' store = HDFStore(store_file) store['pre'] = pre store['post'] = post store.flush() print('Data stored at {}'.format(store_file))