コード例 #1
0
    def save(self, model_prefix='model'):
        model_json = self.model.to_json()
        with open(DATA_DIR + "model/model.json", "w") as json_file:
            json_file.write(model_json)

        self.model.save_weights(DATA_DIR + "model/model.h5")
        LOG.info("Saved model to disk")
コード例 #2
0
    def fit(self, model_prefix="model-1"):
        LOG.info("Training......")
        callbacks = []
        if model_prefix is not None:
            file_path = DATA_DIR + "model/%s-{epoch:03d}-{val_loss:.4f}.hdf5" % model_prefix
            callbacks.append(
                ModelCheckpoint(file_path,
                                monitor='val_loss',
                                mode='min',
                                save_weights_only=True,
                                verbose=1))

        MLPModel.start_new_session()
        LOG.info("Session created")

        LOG.info("Starting Training.....")
        history = self.model.fit(self.X_train,
                                 self.Y_train,
                                 epochs=self.n_epochs,
                                 batch_size=self.batch_size,
                                 validation_data=(self.X_valid, self.Y_valid),
                                 callbacks=callbacks)
        LOG.info("Training Completed")

        if model_prefix is not None:
            file_path = DATA_DIR + 'model/%s-history.pickle' % model_prefix
            with open(file_path, 'wb') as handle:
                pickle.dump(history.history,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)
        LOG.info("Training Completed")
コード例 #3
0
    def load(self, model_prefix='latest'):
        json_file = open(config.DATA_DIR + 'model/{}-model.json'.format(model_prefix), 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        loaded_model.load_weights(config.DATA_DIR + "model/{}-model.h5".format(model_prefix))
        LOG.info("Loaded model from disk")

        return loaded_model
コード例 #4
0
 def first_last_k(coords):
     try:
         k = 5
         partial = [coords[0] for i in range(2 * k)]
         num_coords = len(coords)
         if num_coords < 2 * k:
             partial[-num_coords:] = coords
         else:
             partial[:k] = coords[:k]
             partial[-k:] = coords[-k:]
         partial = np.row_stack(partial)
         return np.array(partial).flatten()
     except:
         LOG.debug(type(coords))
コード例 #5
0
    def load_data(self):
        if os.path.isfile(self.train_cache):
            LOG.info("Found cached data")
            train = pd.read_pickle(self.train_cache)
            validation = pd.read_pickle(self.validation_cache)
            test = pd.read_pickle(self.test_cache)

            train_labels = np.load(self.train_labels_cache)
            validation_labels = np.load(self.validation_labels_cache)
            test_labels = np.load(self.test_labels_cache)

            competition_test = pd.read_pickle(self.competition_test_cache)
            with open(self.metadata_cache, 'rb') as handle:
                metadata = pickle.load(handle)
            LOG.info("Data Loaded")
        else:
            LOG.info("Cached data not found....")
            LOG.info("Tranforming data")
            datasets = []
            for kind in ['train', 'test']:
                csv_file = '{0}/trainingdata/{1}.csv'.format(DATA_DIR, kind)
                df = pd.read_csv(csv_file)
                df = df[df['MISSING_DATA'] == False]
                df = df[df['POLYLINE'] != '[]']
                df.drop('MISSING_DATA', axis=1, inplace=True)
                df.drop('DAY_TYPE', axis=1, inplace=True)
                df['TIMESTAMP'] = df['TIMESTAMP'].astype('datetime64[s]')
                Data.extract_features(df)
                datasets.append(df)

            train, competition_test = datasets

            client_encoder = Data.encode_feature('ORIGIN_CALL', train, competition_test)
            taxi_encoder = Data.encode_feature('TAXI_ID', train, competition_test)
            stand_encoder = Data.encode_feature('ORIGIN_STAND', train, competition_test)

            train['POLYLINE_FULL'] = train['POLYLINE'].copy()
            train['POLYLINE'] = train['POLYLINE'].apply(Data.random_truncate)
            train_labels = np.column_stack([
                train['POLYLINE_FULL'].apply(lambda x: x[-1][0]),
                train['POLYLINE_FULL'].apply(lambda x: x[-1][1])
            ])
            train, train_labels = Data.remove_outliers(train, train_labels)

            metadata = {
                'n_quarter_hours': 96,  # Number of quarter of hours in one day (i.e. 24 * 4).
                'n_days_per_week': 7,
                'n_weeks_per_year': 52,
                'n_client_ids': len(client_encoder.classes_),
                'n_taxi_ids': len(taxi_encoder.classes_),
                'n_stand_ids': len(stand_encoder.classes_),
            }

            train, validation, train_labels, validation_labels = train_test_split(train, train_labels, test_size=0.02)
            validation, test, validation_labels, test_labels = train_test_split(validation, validation_labels,
                                                                                test_size=0.5)

            train.to_pickle(self.train_cache)
            validation.to_pickle(self.validation_cache)
            test.to_pickle(self.test_cache)
            np.save(self.train_labels_cache, train_labels)
            np.save(self.validation_labels_cache, validation_labels)
            np.save(self.test_labels_cache, test_labels)
            competition_test.to_pickle(self.competition_test_cache)
            with open(self.metadata_cache, 'wb') as handle:
                pickle.dump(metadata, handle, protocol=pickle.HIGHEST_PROTOCOL)

            LOG.info("Data Transformed")

        self.X_train = train
        self.Y_train = train_labels
        self.X_valid = validation
        self.Y_valid = validation_labels
        self.X_test = test
        self.Y_test = test_labels
        self.metadata = metadata