Ejemplo n.º 1
0
    def test_writing_twice(self):
        with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file:
            s3_file.write(b'Some other data.')

        self.assertEqual(get_contents(self.TEST_KEY), b'Some other data.')

        with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file:
            s3_file.write(self.TEST_CONTENTS)

        self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
Ejemplo n.º 2
0
    def test_writing_twice(self):
        with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file:
            s3_file.write(b'Some other data.')

        self.assertEqual(get_contents(self.TEST_KEY), b'Some other data.')

        with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file:
            s3_file.write(self.TEST_CONTENTS)

        self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
Ejemplo n.º 3
0
    def test_writing_via_predefined_connection(self):
        s3 = boto.connect_s3(**CREDENTIALS)

        with s3io.open(self.S3_URL, mode='w', s3_connection=s3) as s3_file:
            s3_file.write(self.TEST_CONTENTS)

        self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
Ejemplo n.º 4
0
    def test_writing_via_predefined_connection(self):
        s3 = boto.connect_s3(**CREDENTIALS)

        with s3io.open(self.S3_URL, mode='w', s3_connection=s3) as s3_file:
            s3_file.write(self.TEST_CONTENTS)

        self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
Ejemplo n.º 5
0
def fit_model_and_forecast(id_list, config):

    # Cast collection of distinct time series IDs into Python list
    id_list = list(id_list)

    # Open connections to S3 File System
    s3 = s3fs.S3FileSystem()
    s3_open1 = s3.open
    s3_open2 = boto.connect_s3(host=config['s3_host'])

    # Loop over time series IDs
    for i, id in enumerate(id_list):

        # Determine S3 file path and load data into pandas dataframe
        file_path = s3.glob(config['path_training_data_parquet'] + 'ID=' + str(id) +
                            '/*.parquet')
        df_data = ParquetFile(file_path,open_with=s3_open1).to_pandas()

        # Sort time series data according to original ordering
        df_data = df_data.sort_values('ORDER')

        # Initialize dataframe to store forecast
        df_forecasts = pd.DataFrame(np.nan, index=range(0, config['len_eval']),
                                    columns=['FORECAST'])

        # Add columns with ID, true data and ordering information
        df_forecasts.insert(0, 'ID', id, allow_duplicates=True)
        df_forecasts.insert(1, 'ORDER', np.arange(1, config['len_eval'] + 1))
        df_forecasts.insert(2, 'DATA', df_data['DATA'][range((config['len_series'] -
                                                              config['len_eval']),
                                                             config['len_series'])].values,
                                                             allow_duplicates=True)

        # Loop over successive estimation windows
        for j, train_end in enumerate(range((config['len_series'] - config['len_eval'] - 1),
                                            (config['len_series'] - 1))):

            # Fit ARMA(2,2) model and forecast one-step ahead
            model = ARMA(df_data['DATA'][range(0, train_end+1)], (2, 2)).fit(disp=False)
            df_forecasts.at[j, 'FORECAST'] = model.predict(train_end+1, train_end+1)

        # Write dataframe with forecast to S3 in Parquet file format
        path = config['path_forecasts'] + 'ID=' + str(id) + '.parquet'
        write(path, df_forecasts, write_index=False, append=False, open_with=s3_open1)

        # Save fitted ARMA model to S3 in pickle file format
        path = config['path_models'] + 'ID=' + str(id) + '.model'
        with s3io.open(path, mode='w', s3_connection=s3_open2) as s3_file:
            joblib.dump(model, s3_file)
Ejemplo n.º 6
0
    def load_models(cls, models=None, model_url=None, model_mode=None):
        start = time.time()
        if cls.MODELS is not None:
            return

        if models:
            log.info("Loading supplied models...")
            cls.MODELS = models
        else:
            if model_mode == "s3":
                with s3io.open(model_url, mode='rb', **credentials) as f:
                    loaded_model = joblib.load(f)
            else:
                with open(model_url, mode='rb') as f:
                    loaded_model = joblib.load(f)

            if loaded_model is None:
                raise Exception("Model does not exist")

            cls.MODELS = loaded_model

        end = time.time()
        log.info("Loaded the models in %d seconds" % (end - start))
Ejemplo n.º 7
0
    def test_reading_via_predefined_connection(self):
        s3 = boto.connect_s3(**CREDENTIALS)

        with s3io.open(self.S3_URL, s3_connection=s3) as s3_file:
            contents = s3_file.read()
            self.assertEqual(contents, self.TEST_CONTENTS)
Ejemplo n.º 8
0
    def test_writing_via_credentials(self):
        with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file:
            s3_file.write(self.TEST_CONTENTS)

        self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
Ejemplo n.º 9
0
        def open_invalid_url():
            INVALID_S3_URL = 's3://something'

            with s3io.open(INVALID_S3_URL, **CREDENTIALS):
                pass
Ejemplo n.º 10
0
"""Example of usage of Joblib with Amazon S3."""

import s3io
import joblib
import numpy as np

big_obj = [np.ones((500, 500)), np.random.random((1000, 1000))]

# Customize the following values with yours
bucket = "my-bucket"
key = "my_pickle.pkl"
compress = ('gzip', 3)
credentials = dict(
    aws_access_key_id="<Public Key>",
    aws_secret_access_key="Private Key",
)

# Dump in an S3 file is easy with Joblib
with s3io.open('s3://{0}/{1}'.format(bucket, key), mode='w',
               **credentials) as s3_file:
    joblib.dump(big_obj, s3_file, compress=compress)

with s3io.open('s3://{0}/{1}'.format(bucket, key), mode='r',
               **credentials) as s3_file:
    obj_reloaded = joblib.load(s3_file)

print("Correctly reloaded? {0}".format(
    all(np.allclose(x, y) for x, y in zip(big_obj, obj_reloaded))))
Ejemplo n.º 11
0
        def read_not_existent_bucket():
            S3_URL = 's3://{0}/{1}'.format(
                'not_existent_bucket_hjshewighksfdkjffh', 'not_existent_key')

            with s3io.open(S3_URL, **CREDENTIALS):
                pass
Ejemplo n.º 12
0
 def test_reading_via_credentials(self):
     with s3io.open(self.S3_URL, **CREDENTIALS) as s3_file:
         contents = s3_file.read()
         self.assertEqual(contents, self.TEST_CONTENTS)
Ejemplo n.º 13
0
    def test_reading_via_predefined_connection(self):
        s3 = boto.connect_s3(**CREDENTIALS)

        with s3io.open(self.S3_URL, s3_connection=s3) as s3_file:
            contents = s3_file.read()
            self.assertEqual(contents, self.TEST_CONTENTS)
Ejemplo n.º 14
0
    def test_writing_via_credentials(self):
        with s3io.open(self.S3_URL, mode='w', **CREDENTIALS) as s3_file:
            s3_file.write(self.TEST_CONTENTS)

        self.assertEqual(get_contents(self.TEST_KEY), self.TEST_CONTENTS)
Ejemplo n.º 15
0
 def test_reading_via_credentials(self):
     with s3io.open(self.S3_URL, **CREDENTIALS) as s3_file:
         contents = s3_file.read()
         self.assertEqual(contents, self.TEST_CONTENTS)
Ejemplo n.º 16
0
 def read_not_existent_key():
     S3_URL = 's3://{0}/{1}'.format(BUCKET, 'not_existent_key')
     with s3io.open(S3_URL, **CREDENTIALS):
         pass
Ejemplo n.º 17
0
 def read_not_existent_key():
     S3_URL = 's3://{0}/{1}'.format(BUCKET, 'not_existent_key')
     with s3io.open(S3_URL, **CREDENTIALS):
         pass
Ejemplo n.º 18
0
        def open_invalid_url():
            INVALID_S3_URL = 's3://something'

            with s3io.open(INVALID_S3_URL, **CREDENTIALS):
                pass
Ejemplo n.º 19
0
        def read_not_existent_bucket():
            S3_URL = 's3://{0}/{1}'.format(
                'not_existent_bucket_hjshewighksfdkjffh', 'not_existent_key')

            with s3io.open(S3_URL, **CREDENTIALS):
                pass