Python normalize Examples, preprocessing.normalization.normalize Python Examples

Example #1

0

Show file

def get_app_trip_df_list(all_features):
    app_features_pt = pd.DataFrame.from_csv(
        './data/app/unnormalized_pt_features_df.csv')
    app_labels_pt = pd.DataFrame.from_csv(
        './data/app/unnormalized_pt_labels_df.csv')['pt_label'].tolist()
    with open("./data/app/trip_dict.txt", "rb") as fp:  # Unpickling
        trip_dict = pickle.load(fp)

    app_pt_df = normalize(app_features_pt[all_features])
    app_pt_df['pt_label'] = app_labels_pt
    app_pt_df['WLONGITUDE'] = app_features_pt['WLONGITUDE']
    app_pt_df['WLATITUDE'] = app_features_pt['WLATITUDE']
    # normalized the point features

    trip_keys = list(trip_dict.keys())
    trip_keys.remove(-1)
    trips_df_list = []
    for cur_trip_key in trip_keys:
        trips_df_list.append(
            Trip(
                app_pt_df.iloc[trip_dict[cur_trip_key -
                                         1]:trip_dict[cur_trip_key]],
                cur_trip_key,
                app_features_pt.iloc[trip_dict[cur_trip_key - 1]]['NID']))

    del app_features_pt, app_labels_pt, app_pt_df, trip_keys

    return trips_df_list

Example #2

0

Show file

def get_google_trip_df_list(all_features):
    file_loc = "./data/google/"
    all_files = os.listdir(file_loc)
    features_df_list = []
    labels_df_list = []
    max_num = 0
    for file in all_files:
        if 'unnormalized_pt_features_df' in file:
            n = int(file[file.index('df_') + 3:file.index('.csv')])
            if n > max_num:
                max_num = n

    for i in range(max_num + 1):
        # go through each file and read out df
        features_tmp = pd.DataFrame.from_csv(file_loc +
                                             'unnormalized_pt_features_df_' +
                                             str(i) + '.csv')
        labels_tmp = pd.DataFrame.from_csv(file_loc +
                                           'unnormalized_pt_labels_df_' +
                                           str(i) + '.csv')

        features_df_list.append(features_tmp)
        labels_df_list.append(labels_tmp)

    with open("./data/google/trip_dict.txt", "rb") as fp:  # Unpickling
        trip_dict = pickle.load(fp)

    # concatenate all dfs
    google_features = pd.DataFrame(
        pd.concat(features_df_list, ignore_index=True))
    google_labels = pd.DataFrame(pd.concat(labels_df_list, ignore_index=True))

    google_labels.pt_label[google_labels.pt_label == 0] = 5  # walk/stationary
    google_labels.pt_label[google_labels.pt_label == 3] = 4  # car
    google_labels.pt_label[google_labels.pt_label == 2] = 3  # bus
    google_labels.pt_label[google_labels.pt_label == 1] = 2  # mrt

    # normalize
    google_pt_df = normalize(google_features[all_features])
    google_pt_df['pt_label'] = google_labels['pt_label']
    google_pt_df['WLONGITUDE'] = google_features['WLONGITUDE']
    google_pt_df['WLATITUDE'] = google_features['WLATITUDE']

    trip_keys = list(trip_dict.keys())
    trip_keys.remove(-1)
    trips_list = []
    for cur_trip_key in trip_keys:
        trips_list.append(
            Trip(
                google_pt_df.iloc[trip_dict[cur_trip_key -
                                            1]:trip_dict[cur_trip_key]],
                cur_trip_key + 2000,
                google_features.iloc[trip_dict[cur_trip_key - 1]]['NID']))

    del file_loc, all_files, features_df_list, labels_df_list, max_num, google_features, google_labels, google_pt_df, \
        trip_keys

    return trips_list

Example #3

0

Show file

File: chips.py Project: mWollenhaupt/Mask_R-CNN_3D-Buildings

def enrich_rgb_channels(image_set, norm="uint8"):
    basename = os.path.basename(image_set)
    shutil.copy(os.path.join(image_set, "{}.tif".format(basename)),
                os.path.join(image_set, "{}_bak.tif".format(basename)))
    normalized = normalization.normalize(
        os.path.join(image_set, "{}.tif".format(basename)), norm)
    with skimage.external.tifffile.TiffWriter(
            os.path.join(image_set, "{}.tif".format(basename))) as tif:
        tif.save(normalized, compress=9)

Example #4

0

Show file

 def detect(self, model_path, images, type="chip"):
     assert type in ["ndom", "chip"]
     if model_path == "last":
         model_path = self.model.find_last()
     self.model.load_weights(model_path, by_name=True)
     results = []
     splits = []
     if type == "ndom":
         splits.extend(conversion.split_ndom(images[0], True))
     elif type == "chip":
         splits.append(images[0])
     for image in images[1:]:
         if type == "ndom":
             splits.extend(conversion.split_ndom(image))
         elif type == "chip":
             splits.append(image)
     for img in splits:
         image_enrich = normalization.normalize(img)
         result = self.model.detect([image_enrich], verbose=1)[0]
         results.append(result)
     return results, splits

Example #5

0

Show file

def get_manual_trip_df_list(all_features):
    manual_features_pt = pd.DataFrame.from_csv(
        './data/manual/unnormalized_pt_features_df.csv')
    manual_labels_pt = pd.DataFrame.from_csv(
        './data/manual/unnormalized_pt_labels_df.csv')

    manual_labels_pt.pt_label[manual_labels_pt.pt_label ==
                              0] = 5  # walk/stationary
    manual_labels_pt.pt_label[manual_labels_pt.pt_label == 3] = 4  # car
    manual_labels_pt.pt_label[manual_labels_pt.pt_label == 2] = 3  # bus
    manual_labels_pt.pt_label[manual_labels_pt.pt_label == 1] = 2  # mrt

    with open("./data/manual/trip_dict.txt", "rb") as fp:  # Unpickling
        trip_dict = pickle.load(fp)

    manual_pt_df = normalize(manual_features_pt[all_features])
    manual_pt_df['pt_label'] = manual_labels_pt['pt_label']
    manual_pt_df['WLONGITUDE'] = manual_features_pt['WLONGITUDE']
    manual_pt_df['WLATITUDE'] = manual_features_pt['WLATITUDE']
    # normalized the point features

    trip_keys = list(trip_dict.keys())
    trip_keys.remove(-1)
    trips_df_list = []
    for cur_trip_key in trip_keys:
        trips_df_list.append(
            Trip(
                manual_pt_df.iloc[trip_dict[cur_trip_key -
                                            1][0]:trip_dict[cur_trip_key][0]],
                cur_trip_key + 1000,
                manual_features_pt.iloc[trip_dict[cur_trip_key -
                                                  1][0]]['NID']))

    del manual_features_pt, manual_labels_pt, manual_pt_df, trip_keys

    return trips_df_list

Example #6

0

Show file

def run_experiments(train_file_name,
                    test_file_name,
                    result_file_name,
                    forecast_horizon,
                    past_history_ls,
                    batch_size_ls,
                    epochs_ls,
                    tcn_params=TCN_PARAMS,
                    lstm_params=LSTM_PARAMS,
                    gpu_number=None,
                    metrics_ls=METRICS,
                    buffer_size=1000,
                    seed=1,
                    show_plots=False,
                    webhook=None,
                    validation_size=0.2):
    tf.random.set_seed(seed)
    np.random.seed(seed)

    gpus = tf.config.experimental.list_physical_devices('GPU')
    print(gpus)
    device_name = str(gpus)
    if len(gpus) >= 2 and gpu_number is not None:
        device = gpus[gpu_number]
        tf.config.experimental.set_memory_growth(device, True)
        tf.config.experimental.set_visible_devices(device, 'GPU')
        device_name = str(device)
        print(device)

    # Write result csv header
    current_index = 0
    try:
        with open(result_file_name, 'r') as resfile:
            current_index = sum(1 for line in resfile) - 1
    except IOError:
        pass
    print('CURRENT INDEX', current_index)
    if current_index == 0:
        with open(result_file_name, 'w') as resfile:
            resfile.write(';'.join([
                str(a) for a in [
                    'MODEL', 'MODEL_DESCRIPTION', 'FORECAST_HORIZON',
                    'PAST_HISTORY', 'BATCH_SIZE', 'EPOCHS'
                ] + metrics_ls + ['val_' + m for m in metrics_ls] +
                ['loss', 'val_loss', 'Execution_time', 'Device']
            ]) + "\n")

    # Read train file
    with open(train_file_name, 'r') as datafile:
        ts_train = datafile.readlines()[1:]  # skip the header
        ts_train = np.asarray([
            np.asarray(l.rstrip().split(',')[0], dtype=np.float32)
            for l in ts_train
        ])
        ts_train = np.reshape(ts_train, (ts_train.shape[0], ))

    # Read test data file
    with open(test_file_name, 'r') as datafile:
        ts_test = datafile.readlines()[1:]  # skip the header
        ts_test = np.asarray([
            np.asarray(l.rstrip().split(',')[0], dtype=np.float32)
            for l in ts_test
        ])
        ts_test = np.reshape(ts_test, (ts_test.shape[0], ))

    # Train/validation split
    TRAIN_SPLIT = int(ts_train.shape[0] * (1 - validation_size))
    print(ts_train.shape, TRAIN_SPLIT)
    # Normalize training data
    norm_params = normalization.get_normalization_params(
        ts_train[:TRAIN_SPLIT])
    ts_train = normalization.normalize(ts_train, norm_params)
    # Normalize test data with train params
    ts_test = normalization.normalize(ts_test, norm_params)

    i = 0
    index_1, total_1 = 0, len(
        list(itertools.product(past_history_ls, batch_size_ls, epochs_ls)))
    for past_history, batch_size, epochs in tqdm(
            list(itertools.product(past_history_ls, batch_size_ls,
                                   epochs_ls))):
        index_1 += 1
        # Get x and y for training and validation
        x_train, y_train = data_generation.univariate_data(
            ts_train, 0, TRAIN_SPLIT, past_history, forecast_horizon)
        x_val, y_val = data_generation.univariate_data(
            ts_train, TRAIN_SPLIT - past_history, ts_train.shape[0],
            past_history, forecast_horizon)
        print(x_train.shape, y_train.shape, '\n', x_val.shape, y_val.shape)
        # Get x and y for test data
        x_test, y_test = data_generation.univariate_data(
            ts_test, 0, ts_test.shape[0], past_history, forecast_horizon)

        # Convert numpy data to tensorflow dataset
        train_data = tf.data.Dataset.from_tensor_slices(
            (x_train,
             y_train)).cache().shuffle(buffer_size).batch(batch_size).repeat()
        val_data = tf.data.Dataset.from_tensor_slices((
            x_val,
            y_val)).batch(batch_size).repeat() if validation_size > 0 else None
        test_data = tf.data.Dataset.from_tensor_slices(
            (x_test, y_test)).batch(batch_size)

        # Create models
        model_list = {}
        model_description_list = {}
        if tcn_params is not None:
            model_list = {
                'TCN_{}'.format(j):
                (tcn,
                 [x_train.shape, forecast_horizon, 'adam', 'mae', *params])
                for j, params in enumerate(
                    itertools.product(*tcn_params.values()))
                if params[1] * params[2] * params[3][-1] == past_history
            }
            model_description_list = {
                'TCN_{}'.format(j): str(dict(zip(tcn_params.keys(), params)))
                for j, params in enumerate(
                    itertools.product(*tcn_params.values()))
                if params[1] * params[2] * params[3][-1] == past_history
            }
        if lstm_params is not None:
            model_list = {
                **model_list,
                **{
                    'LSTM_{}'.format(j): (lstm, [
                        x_train.shape, forecast_horizon, 'adam', 'mae', *params
                    ])
                    for j, params in enumerate(
                        itertools.product(*lstm_params.values()))
                }
            }
            model_description_list = {
                **model_description_list,
                **{
                    'LSTM_{}'.format(j): str(
                        dict(zip(lstm_params.keys(), params)))
                    for j, params in enumerate(
                        itertools.product(*lstm_params.values()))
                }
            }

        steps_per_epoch = int(np.ceil(x_train.shape[0] / batch_size))
        validation_steps = steps_per_epoch if val_data else None

        index_2, total_2 = 0, len(model_list.keys())
        for model_name, (model_function, params) in tqdm(model_list.items(),
                                                         position=1):
            index_2 += 1
            i += 1
            if i <= current_index:
                continue
            start = time.time()
            model = model_function(*params)
            print(model.summary())

            # Train the model
            history = model.fit(train_data,
                                epochs=epochs,
                                steps_per_epoch=steps_per_epoch,
                                validation_data=val_data,
                                validation_steps=validation_steps)

            # Plot training and evaluation loss evolution
            if show_plots:
                auxiliary_plots.plot_training_history(history, ['loss'])

            # Get validation results
            val_metrics = {}
            if validation_size > 0:
                val_forecast = model.predict(x_val)
                val_forecast = normalization.denormalize(
                    val_forecast, norm_params)
                y_val_denormalized = normalization.denormalize(
                    y_val, norm_params)

                val_metrics = metrics.evaluate(y_val_denormalized,
                                               val_forecast, metrics_ls)
                print('Validation metrics', val_metrics)

            # TEST
            # Predict with test data and get results
            test_forecast = model.predict(test_data)

            test_forecast = normalization.denormalize(test_forecast,
                                                      norm_params)
            y_test_denormalized = normalization.denormalize(
                y_test, norm_params)
            x_test_denormalized = normalization.denormalize(
                x_test, norm_params)

            test_metrics = metrics.evaluate(y_test_denormalized, test_forecast,
                                            metrics_ls)
            print('Test scores', test_metrics)

            # Plot some test predictions
            if show_plots:
                auxiliary_plots.plot_ts_forecasts(x_test_denormalized,
                                                  y_test_denormalized,
                                                  test_forecast)

            # Save results
            val_metrics = {'val_' + k: val_metrics[k] for k in val_metrics}
            model_metric = {
                'MODEL': model_name,
                'MODEL_DESCRIPTION': model_description_list[model_name],
                'FORECAST_HORIZON': forecast_horizon,
                'PAST_HISTORY': past_history,
                'BATCH_SIZE': batch_size,
                'EPOCHS': epochs,
                **test_metrics,
                **val_metrics,
                **history.history, 'Execution_time': time.time() - start,
                'Device': device_name
            }

            notify_slack('Progress: {0}/{1} ({2}/{3}) \nMetrics:{4}'.format(
                index_1, total_1, index_2, total_2,
                str({
                    'Model':
                    model_name,
                    'WAPE':
                    str(test_metrics['wape']),
                    'Execution_time':
                    "{0:.2f}  seconds".format(time.time() - start)
                })),
                         webhook=webhook)

            with open(result_file_name, 'a') as resfile:
                resfile.write(';'.join([str(a)
                                        for a in model_metric.values()]) +
                              "\n")