def test_writing_twice(self): with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file: s3_file.write(b'Some other data.') self.assertEqual(get_contents(self.TEST_KEY), b'Some other data.') with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file: s3_file.write(self.TEST_CONTENTS) self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
def test_writing_via_predefined_connection(self): s3 = boto.connect_s3(**CREDENTIALS) with s3io.open(self.S3_URL, mode='w', s3_connection=s3) as s3_file: s3_file.write(self.TEST_CONTENTS) self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
def fit_model_and_forecast(id_list, config): # Cast collection of distinct time series IDs into Python list id_list = list(id_list) # Open connections to S3 File System s3 = s3fs.S3FileSystem() s3_open1 = s3.open s3_open2 = boto.connect_s3(host=config['s3_host']) # Loop over time series IDs for i, id in enumerate(id_list): # Determine S3 file path and load data into pandas dataframe file_path = s3.glob(config['path_training_data_parquet'] + 'ID=' + str(id) + '/*.parquet') df_data = ParquetFile(file_path,open_with=s3_open1).to_pandas() # Sort time series data according to original ordering df_data = df_data.sort_values('ORDER') # Initialize dataframe to store forecast df_forecasts = pd.DataFrame(np.nan, index=range(0, config['len_eval']), columns=['FORECAST']) # Add columns with ID, true data and ordering information df_forecasts.insert(0, 'ID', id, allow_duplicates=True) df_forecasts.insert(1, 'ORDER', np.arange(1, config['len_eval'] + 1)) df_forecasts.insert(2, 'DATA', df_data['DATA'][range((config['len_series'] - config['len_eval']), config['len_series'])].values, allow_duplicates=True) # Loop over successive estimation windows for j, train_end in enumerate(range((config['len_series'] - config['len_eval'] - 1), (config['len_series'] - 1))): # Fit ARMA(2,2) model and forecast one-step ahead model = ARMA(df_data['DATA'][range(0, train_end+1)], (2, 2)).fit(disp=False) df_forecasts.at[j, 'FORECAST'] = model.predict(train_end+1, train_end+1) # Write dataframe with forecast to S3 in Parquet file format path = config['path_forecasts'] + 'ID=' + str(id) + '.parquet' write(path, df_forecasts, write_index=False, append=False, open_with=s3_open1) # Save fitted ARMA model to S3 in pickle file format path = config['path_models'] + 'ID=' + str(id) + '.model' with s3io.open(path, mode='w', s3_connection=s3_open2) as s3_file: joblib.dump(model, s3_file)
def load_models(cls, models=None, model_url=None, model_mode=None): start = time.time() if cls.MODELS is not None: return if models: log.info("Loading supplied models...") cls.MODELS = models else: if model_mode == "s3": with s3io.open(model_url, mode='rb', **credentials) as f: loaded_model = joblib.load(f) else: with open(model_url, mode='rb') as f: loaded_model = joblib.load(f) if loaded_model is None: raise Exception("Model does not exist") cls.MODELS = loaded_model end = time.time() log.info("Loaded the models in %d seconds" % (end - start))
def test_reading_via_predefined_connection(self): s3 = boto.connect_s3(**CREDENTIALS) with s3io.open(self.S3_URL, s3_connection=s3) as s3_file: contents = s3_file.read() self.assertEqual(contents, self.TEST_CONTENTS)
def test_writing_via_credentials(self): with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file: s3_file.write(self.TEST_CONTENTS) self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
def open_invalid_url(): INVALID_S3_URL = 's3://something' with s3io.open(INVALID_S3_URL, **CREDENTIALS): pass
"""Example of usage of Joblib with Amazon S3.""" import s3io import joblib import numpy as np big_obj = [np.ones((500, 500)), np.random.random((1000, 1000))] # Customize the following values with yours bucket = "my-bucket" key = "my_pickle.pkl" compress = ('gzip', 3) credentials = dict( aws_access_key_id="<Public Key>", aws_secret_access_key="Private Key", ) # Dump in an S3 file is easy with Joblib with s3io.open('s3://{0}/{1}'.format(bucket, key), mode='w', **credentials) as s3_file: joblib.dump(big_obj, s3_file, compress=compress) with s3io.open('s3://{0}/{1}'.format(bucket, key), mode='r', **credentials) as s3_file: obj_reloaded = joblib.load(s3_file) print("Correctly reloaded? {0}".format( all(np.allclose(x, y) for x, y in zip(big_obj, obj_reloaded))))
def read_not_existent_bucket(): S3_URL = 's3://{0}/{1}'.format( 'not_existent_bucket_hjshewighksfdkjffh', 'not_existent_key') with s3io.open(S3_URL, **CREDENTIALS): pass
def test_reading_via_credentials(self): with s3io.open(self.S3_URL, **CREDENTIALS) as s3_file: contents = s3_file.read() self.assertEqual(contents, self.TEST_CONTENTS)
def read_not_existent_key(): S3_URL = 's3://{0}/{1}'.format(BUCKET, 'not_existent_key') with s3io.open(S3_URL, **CREDENTIALS): pass