def save(self, model_prefix='model'): model_json = self.model.to_json() with open(DATA_DIR + "model/model.json", "w") as json_file: json_file.write(model_json) self.model.save_weights(DATA_DIR + "model/model.h5") LOG.info("Saved model to disk")
def fit(self, model_prefix="model-1"): LOG.info("Training......") callbacks = [] if model_prefix is not None: file_path = DATA_DIR + "model/%s-{epoch:03d}-{val_loss:.4f}.hdf5" % model_prefix callbacks.append( ModelCheckpoint(file_path, monitor='val_loss', mode='min', save_weights_only=True, verbose=1)) MLPModel.start_new_session() LOG.info("Session created") LOG.info("Starting Training.....") history = self.model.fit(self.X_train, self.Y_train, epochs=self.n_epochs, batch_size=self.batch_size, validation_data=(self.X_valid, self.Y_valid), callbacks=callbacks) LOG.info("Training Completed") if model_prefix is not None: file_path = DATA_DIR + 'model/%s-history.pickle' % model_prefix with open(file_path, 'wb') as handle: pickle.dump(history.history, handle, protocol=pickle.HIGHEST_PROTOCOL) LOG.info("Training Completed")
def load(self, model_prefix='latest'): json_file = open(config.DATA_DIR + 'model/{}-model.json'.format(model_prefix), 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) loaded_model.load_weights(config.DATA_DIR + "model/{}-model.h5".format(model_prefix)) LOG.info("Loaded model from disk") return loaded_model
def first_last_k(coords): try: k = 5 partial = [coords[0] for i in range(2 * k)] num_coords = len(coords) if num_coords < 2 * k: partial[-num_coords:] = coords else: partial[:k] = coords[:k] partial[-k:] = coords[-k:] partial = np.row_stack(partial) return np.array(partial).flatten() except: LOG.debug(type(coords))
def load_data(self): if os.path.isfile(self.train_cache): LOG.info("Found cached data") train = pd.read_pickle(self.train_cache) validation = pd.read_pickle(self.validation_cache) test = pd.read_pickle(self.test_cache) train_labels = np.load(self.train_labels_cache) validation_labels = np.load(self.validation_labels_cache) test_labels = np.load(self.test_labels_cache) competition_test = pd.read_pickle(self.competition_test_cache) with open(self.metadata_cache, 'rb') as handle: metadata = pickle.load(handle) LOG.info("Data Loaded") else: LOG.info("Cached data not found....") LOG.info("Tranforming data") datasets = [] for kind in ['train', 'test']: csv_file = '{0}/trainingdata/{1}.csv'.format(DATA_DIR, kind) df = pd.read_csv(csv_file) df = df[df['MISSING_DATA'] == False] df = df[df['POLYLINE'] != '[]'] df.drop('MISSING_DATA', axis=1, inplace=True) df.drop('DAY_TYPE', axis=1, inplace=True) df['TIMESTAMP'] = df['TIMESTAMP'].astype('datetime64[s]') Data.extract_features(df) datasets.append(df) train, competition_test = datasets client_encoder = Data.encode_feature('ORIGIN_CALL', train, competition_test) taxi_encoder = Data.encode_feature('TAXI_ID', train, competition_test) stand_encoder = Data.encode_feature('ORIGIN_STAND', train, competition_test) train['POLYLINE_FULL'] = train['POLYLINE'].copy() train['POLYLINE'] = train['POLYLINE'].apply(Data.random_truncate) train_labels = np.column_stack([ train['POLYLINE_FULL'].apply(lambda x: x[-1][0]), train['POLYLINE_FULL'].apply(lambda x: x[-1][1]) ]) train, train_labels = Data.remove_outliers(train, train_labels) metadata = { 'n_quarter_hours': 96, # Number of quarter of hours in one day (i.e. 24 * 4). 'n_days_per_week': 7, 'n_weeks_per_year': 52, 'n_client_ids': len(client_encoder.classes_), 'n_taxi_ids': len(taxi_encoder.classes_), 'n_stand_ids': len(stand_encoder.classes_), } train, validation, train_labels, validation_labels = train_test_split(train, train_labels, test_size=0.02) validation, test, validation_labels, test_labels = train_test_split(validation, validation_labels, test_size=0.5) train.to_pickle(self.train_cache) validation.to_pickle(self.validation_cache) test.to_pickle(self.test_cache) np.save(self.train_labels_cache, train_labels) np.save(self.validation_labels_cache, validation_labels) np.save(self.test_labels_cache, test_labels) competition_test.to_pickle(self.competition_test_cache) with open(self.metadata_cache, 'wb') as handle: pickle.dump(metadata, handle, protocol=pickle.HIGHEST_PROTOCOL) LOG.info("Data Transformed") self.X_train = train self.Y_train = train_labels self.X_valid = validation self.Y_valid = validation_labels self.X_test = test self.Y_test = test_labels self.metadata = metadata