def _load(self): tmp_path = temp_file.make_temporary_file() with temp_file.deleting(tmp_path): print('loading from s3') load_file_from_s3(self.boto_bucket, self.schema.name, tmp_path) print('loading from hdf') store = HdfDataStore(self.schema, tmp_path) return store._load()
def _store(self, df): tmp_path = temp_file.make_temporary_file() with temp_file.deleting(tmp_path): print('storing to temp hdf') store = HdfDataStore(self.schema, tmp_path) store._store(df) print('saving to s3') store_file_to_s3(self.boto_bucket, self.schema.name, tmp_path)
def _store_chunks(self, chunks): for i, chunk in enumerate(chunks): k = self._chunk_key(i) tmp_path = temp_file.make_temporary_file() with temp_file.deleting(tmp_path): print('storing chunk to temp hdf') store = HdfDataStore(self.schema, tmp_path) store._store(chunk) print('saving chunk to s3') store_file_to_s3(self.boto_bucket, k, tmp_path)
def _load_chunks(self): for i in count(): k = self._chunk_key(i) if not key_exists(self.boto_bucket, k): break tmp_path = temp_file.make_temporary_file() with temp_file.deleting(tmp_path): print('loading from s3') load_file_from_s3(self.boto_bucket, k, tmp_path) print('loading from hdf') store = HdfDataStore(self.schema, tmp_path) chunk = store._load() yield chunk