def get_app_trip_df_list(all_features): app_features_pt = pd.DataFrame.from_csv( './data/app/unnormalized_pt_features_df.csv') app_labels_pt = pd.DataFrame.from_csv( './data/app/unnormalized_pt_labels_df.csv')['pt_label'].tolist() with open("./data/app/trip_dict.txt", "rb") as fp: # Unpickling trip_dict = pickle.load(fp) app_pt_df = normalize(app_features_pt[all_features]) app_pt_df['pt_label'] = app_labels_pt app_pt_df['WLONGITUDE'] = app_features_pt['WLONGITUDE'] app_pt_df['WLATITUDE'] = app_features_pt['WLATITUDE'] # normalized the point features trip_keys = list(trip_dict.keys()) trip_keys.remove(-1) trips_df_list = [] for cur_trip_key in trip_keys: trips_df_list.append( Trip( app_pt_df.iloc[trip_dict[cur_trip_key - 1]:trip_dict[cur_trip_key]], cur_trip_key, app_features_pt.iloc[trip_dict[cur_trip_key - 1]]['NID'])) del app_features_pt, app_labels_pt, app_pt_df, trip_keys return trips_df_list
def get_google_trip_df_list(all_features): file_loc = "./data/google/" all_files = os.listdir(file_loc) features_df_list = [] labels_df_list = [] max_num = 0 for file in all_files: if 'unnormalized_pt_features_df' in file: n = int(file[file.index('df_') + 3:file.index('.csv')]) if n > max_num: max_num = n for i in range(max_num + 1): # go through each file and read out df features_tmp = pd.DataFrame.from_csv(file_loc + 'unnormalized_pt_features_df_' + str(i) + '.csv') labels_tmp = pd.DataFrame.from_csv(file_loc + 'unnormalized_pt_labels_df_' + str(i) + '.csv') features_df_list.append(features_tmp) labels_df_list.append(labels_tmp) with open("./data/google/trip_dict.txt", "rb") as fp: # Unpickling trip_dict = pickle.load(fp) # concatenate all dfs google_features = pd.DataFrame( pd.concat(features_df_list, ignore_index=True)) google_labels = pd.DataFrame(pd.concat(labels_df_list, ignore_index=True)) google_labels.pt_label[google_labels.pt_label == 0] = 5 # walk/stationary google_labels.pt_label[google_labels.pt_label == 3] = 4 # car google_labels.pt_label[google_labels.pt_label == 2] = 3 # bus google_labels.pt_label[google_labels.pt_label == 1] = 2 # mrt # normalize google_pt_df = normalize(google_features[all_features]) google_pt_df['pt_label'] = google_labels['pt_label'] google_pt_df['WLONGITUDE'] = google_features['WLONGITUDE'] google_pt_df['WLATITUDE'] = google_features['WLATITUDE'] trip_keys = list(trip_dict.keys()) trip_keys.remove(-1) trips_list = [] for cur_trip_key in trip_keys: trips_list.append( Trip( google_pt_df.iloc[trip_dict[cur_trip_key - 1]:trip_dict[cur_trip_key]], cur_trip_key + 2000, google_features.iloc[trip_dict[cur_trip_key - 1]]['NID'])) del file_loc, all_files, features_df_list, labels_df_list, max_num, google_features, google_labels, google_pt_df, \ trip_keys return trips_list
def enrich_rgb_channels(image_set, norm="uint8"): basename = os.path.basename(image_set) shutil.copy(os.path.join(image_set, "{}.tif".format(basename)), os.path.join(image_set, "{}_bak.tif".format(basename))) normalized = normalization.normalize( os.path.join(image_set, "{}.tif".format(basename)), norm) with skimage.external.tifffile.TiffWriter( os.path.join(image_set, "{}.tif".format(basename))) as tif: tif.save(normalized, compress=9)
def detect(self, model_path, images, type="chip"): assert type in ["ndom", "chip"] if model_path == "last": model_path = self.model.find_last() self.model.load_weights(model_path, by_name=True) results = [] splits = [] if type == "ndom": splits.extend(conversion.split_ndom(images[0], True)) elif type == "chip": splits.append(images[0]) for image in images[1:]: if type == "ndom": splits.extend(conversion.split_ndom(image)) elif type == "chip": splits.append(image) for img in splits: image_enrich = normalization.normalize(img) result = self.model.detect([image_enrich], verbose=1)[0] results.append(result) return results, splits
def get_manual_trip_df_list(all_features): manual_features_pt = pd.DataFrame.from_csv( './data/manual/unnormalized_pt_features_df.csv') manual_labels_pt = pd.DataFrame.from_csv( './data/manual/unnormalized_pt_labels_df.csv') manual_labels_pt.pt_label[manual_labels_pt.pt_label == 0] = 5 # walk/stationary manual_labels_pt.pt_label[manual_labels_pt.pt_label == 3] = 4 # car manual_labels_pt.pt_label[manual_labels_pt.pt_label == 2] = 3 # bus manual_labels_pt.pt_label[manual_labels_pt.pt_label == 1] = 2 # mrt with open("./data/manual/trip_dict.txt", "rb") as fp: # Unpickling trip_dict = pickle.load(fp) manual_pt_df = normalize(manual_features_pt[all_features]) manual_pt_df['pt_label'] = manual_labels_pt['pt_label'] manual_pt_df['WLONGITUDE'] = manual_features_pt['WLONGITUDE'] manual_pt_df['WLATITUDE'] = manual_features_pt['WLATITUDE'] # normalized the point features trip_keys = list(trip_dict.keys()) trip_keys.remove(-1) trips_df_list = [] for cur_trip_key in trip_keys: trips_df_list.append( Trip( manual_pt_df.iloc[trip_dict[cur_trip_key - 1][0]:trip_dict[cur_trip_key][0]], cur_trip_key + 1000, manual_features_pt.iloc[trip_dict[cur_trip_key - 1][0]]['NID'])) del manual_features_pt, manual_labels_pt, manual_pt_df, trip_keys return trips_df_list
def run_experiments(train_file_name, test_file_name, result_file_name, forecast_horizon, past_history_ls, batch_size_ls, epochs_ls, tcn_params=TCN_PARAMS, lstm_params=LSTM_PARAMS, gpu_number=None, metrics_ls=METRICS, buffer_size=1000, seed=1, show_plots=False, webhook=None, validation_size=0.2): tf.random.set_seed(seed) np.random.seed(seed) gpus = tf.config.experimental.list_physical_devices('GPU') print(gpus) device_name = str(gpus) if len(gpus) >= 2 and gpu_number is not None: device = gpus[gpu_number] tf.config.experimental.set_memory_growth(device, True) tf.config.experimental.set_visible_devices(device, 'GPU') device_name = str(device) print(device) # Write result csv header current_index = 0 try: with open(result_file_name, 'r') as resfile: current_index = sum(1 for line in resfile) - 1 except IOError: pass print('CURRENT INDEX', current_index) if current_index == 0: with open(result_file_name, 'w') as resfile: resfile.write(';'.join([ str(a) for a in [ 'MODEL', 'MODEL_DESCRIPTION', 'FORECAST_HORIZON', 'PAST_HISTORY', 'BATCH_SIZE', 'EPOCHS' ] + metrics_ls + ['val_' + m for m in metrics_ls] + ['loss', 'val_loss', 'Execution_time', 'Device'] ]) + "\n") # Read train file with open(train_file_name, 'r') as datafile: ts_train = datafile.readlines()[1:] # skip the header ts_train = np.asarray([ np.asarray(l.rstrip().split(',')[0], dtype=np.float32) for l in ts_train ]) ts_train = np.reshape(ts_train, (ts_train.shape[0], )) # Read test data file with open(test_file_name, 'r') as datafile: ts_test = datafile.readlines()[1:] # skip the header ts_test = np.asarray([ np.asarray(l.rstrip().split(',')[0], dtype=np.float32) for l in ts_test ]) ts_test = np.reshape(ts_test, (ts_test.shape[0], )) # Train/validation split TRAIN_SPLIT = int(ts_train.shape[0] * (1 - validation_size)) print(ts_train.shape, TRAIN_SPLIT) # Normalize training data norm_params = normalization.get_normalization_params( ts_train[:TRAIN_SPLIT]) ts_train = normalization.normalize(ts_train, norm_params) # Normalize test data with train params ts_test = normalization.normalize(ts_test, norm_params) i = 0 index_1, total_1 = 0, len( list(itertools.product(past_history_ls, batch_size_ls, epochs_ls))) for past_history, batch_size, epochs in tqdm( list(itertools.product(past_history_ls, batch_size_ls, epochs_ls))): index_1 += 1 # Get x and y for training and validation x_train, y_train = data_generation.univariate_data( ts_train, 0, TRAIN_SPLIT, past_history, forecast_horizon) x_val, y_val = data_generation.univariate_data( ts_train, TRAIN_SPLIT - past_history, ts_train.shape[0], past_history, forecast_horizon) print(x_train.shape, y_train.shape, '\n', x_val.shape, y_val.shape) # Get x and y for test data x_test, y_test = data_generation.univariate_data( ts_test, 0, ts_test.shape[0], past_history, forecast_horizon) # Convert numpy data to tensorflow dataset train_data = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).cache().shuffle(buffer_size).batch(batch_size).repeat() val_data = tf.data.Dataset.from_tensor_slices(( x_val, y_val)).batch(batch_size).repeat() if validation_size > 0 else None test_data = tf.data.Dataset.from_tensor_slices( (x_test, y_test)).batch(batch_size) # Create models model_list = {} model_description_list = {} if tcn_params is not None: model_list = { 'TCN_{}'.format(j): (tcn, [x_train.shape, forecast_horizon, 'adam', 'mae', *params]) for j, params in enumerate( itertools.product(*tcn_params.values())) if params[1] * params[2] * params[3][-1] == past_history } model_description_list = { 'TCN_{}'.format(j): str(dict(zip(tcn_params.keys(), params))) for j, params in enumerate( itertools.product(*tcn_params.values())) if params[1] * params[2] * params[3][-1] == past_history } if lstm_params is not None: model_list = { **model_list, **{ 'LSTM_{}'.format(j): (lstm, [ x_train.shape, forecast_horizon, 'adam', 'mae', *params ]) for j, params in enumerate( itertools.product(*lstm_params.values())) } } model_description_list = { **model_description_list, **{ 'LSTM_{}'.format(j): str( dict(zip(lstm_params.keys(), params))) for j, params in enumerate( itertools.product(*lstm_params.values())) } } steps_per_epoch = int(np.ceil(x_train.shape[0] / batch_size)) validation_steps = steps_per_epoch if val_data else None index_2, total_2 = 0, len(model_list.keys()) for model_name, (model_function, params) in tqdm(model_list.items(), position=1): index_2 += 1 i += 1 if i <= current_index: continue start = time.time() model = model_function(*params) print(model.summary()) # Train the model history = model.fit(train_data, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=val_data, validation_steps=validation_steps) # Plot training and evaluation loss evolution if show_plots: auxiliary_plots.plot_training_history(history, ['loss']) # Get validation results val_metrics = {} if validation_size > 0: val_forecast = model.predict(x_val) val_forecast = normalization.denormalize( val_forecast, norm_params) y_val_denormalized = normalization.denormalize( y_val, norm_params) val_metrics = metrics.evaluate(y_val_denormalized, val_forecast, metrics_ls) print('Validation metrics', val_metrics) # TEST # Predict with test data and get results test_forecast = model.predict(test_data) test_forecast = normalization.denormalize(test_forecast, norm_params) y_test_denormalized = normalization.denormalize( y_test, norm_params) x_test_denormalized = normalization.denormalize( x_test, norm_params) test_metrics = metrics.evaluate(y_test_denormalized, test_forecast, metrics_ls) print('Test scores', test_metrics) # Plot some test predictions if show_plots: auxiliary_plots.plot_ts_forecasts(x_test_denormalized, y_test_denormalized, test_forecast) # Save results val_metrics = {'val_' + k: val_metrics[k] for k in val_metrics} model_metric = { 'MODEL': model_name, 'MODEL_DESCRIPTION': model_description_list[model_name], 'FORECAST_HORIZON': forecast_horizon, 'PAST_HISTORY': past_history, 'BATCH_SIZE': batch_size, 'EPOCHS': epochs, **test_metrics, **val_metrics, **history.history, 'Execution_time': time.time() - start, 'Device': device_name } notify_slack('Progress: {0}/{1} ({2}/{3}) \nMetrics:{4}'.format( index_1, total_1, index_2, total_2, str({ 'Model': model_name, 'WAPE': str(test_metrics['wape']), 'Execution_time': "{0:.2f} seconds".format(time.time() - start) })), webhook=webhook) with open(result_file_name, 'a') as resfile: resfile.write(';'.join([str(a) for a in model_metric.values()]) + "\n")