def _load(self):
        tmp_path = temp_file.make_temporary_file()
        with temp_file.deleting(tmp_path):
            print('loading from s3')
            load_file_from_s3(self.boto_bucket, self.schema.name, tmp_path)

            print('loading from hdf')
            store = HdfDataStore(self.schema, tmp_path)
            return store._load()
    def _store(self, df):
        tmp_path = temp_file.make_temporary_file()
        with temp_file.deleting(tmp_path):
            print('storing to temp hdf')
            store = HdfDataStore(self.schema, tmp_path)
            store._store(df)

            print('saving to s3')
            store_file_to_s3(self.boto_bucket, self.schema.name, tmp_path)
예제 #3
0
    def _load(self):
        tmp_path = temp_file.make_temporary_file()
        with temp_file.deleting(tmp_path):
            print('loading from s3')
            load_file_from_s3(self.boto_bucket, self.schema.name, tmp_path)

            print('loading from hdf')
            store = HdfDataStore(self.schema, tmp_path)
            return store._load()
예제 #4
0
    def _store(self, df):
        tmp_path = temp_file.make_temporary_file()
        with temp_file.deleting(tmp_path):
            print('storing to temp hdf')
            store = HdfDataStore(self.schema, tmp_path)
            store._store(df)

            print('saving to s3')
            store_file_to_s3(self.boto_bucket, self.schema.name, tmp_path)
예제 #5
0
    def _store_chunks(self, chunks):
        for i, chunk in enumerate(chunks):
            k = self._chunk_key(i)

            tmp_path = temp_file.make_temporary_file()
            with temp_file.deleting(tmp_path):
                print('storing chunk to temp hdf')
                store = HdfDataStore(self.schema, tmp_path)
                store._store(chunk)

                print('saving chunk to s3')
                store_file_to_s3(self.boto_bucket, k, tmp_path)
예제 #6
0
    def _load_chunks(self):
        for i in count():
            
            k = self._chunk_key(i)
            if not key_exists(self.boto_bucket, k):
                break

            tmp_path = temp_file.make_temporary_file()
            with temp_file.deleting(tmp_path):
                print('loading from s3')
                load_file_from_s3(self.boto_bucket, k, tmp_path)

                print('loading from hdf')
                store = HdfDataStore(self.schema, tmp_path)
                chunk = store._load()
                yield chunk