def report_poorly_performing_classification_indices(network,
                                                    data,
                                                    flags,
                                                    n_report=4):
    unshuffled = dataset_loaders.risk_dataset_loader(
        flags.dataset_filepath,
        shuffle=False,
        train_split=1.,
        debug_size=flags.debug_size,
        timesteps=flags.timesteps,
        num_target_bins=flags.num_target_bins,
        balanced_class_loss=flags.balanced_class_loss,
        target_index=flags.target_index,
        load_likelihood_weights=flags.use_likelihood_weights)
    x, y_true = unshuffled['x_train'], unshuffled['y_train']
    y_pred, y_probs = network.predict(x, predict_labels=True)
    if len(y_probs.shape) == 3:
        cur_probs = y_probs[:, :, 1]
    else:
        cur_probs = y_probs[:, :]
    ce = cross_entropy_loss(y_true, cur_probs)

    for tidx in range(flags.output_dim):
        print(TARGET_LABELS[tidx])
        idxs = list(reversed(np.argsort(ce[:, tidx])))[:n_report]
        report_poorly_performing_indices(idxs, unshuffled)
Exemple #2
0
def evaluate_regression_fit(network, data, flags):
    # final train loss
    y_pred = network.predict(data['x_train'])
    y = data['y_train']
    y_null = np.mean(y, axis=0)
    regression_score(y, y_pred, 'training', y_null=y_null)

    # final validation loss
    y_pred = network.predict(data['x_val'])
    y = data['y_val']
    y_null = np.mean(y, axis=0)
    regression_score(y, y_pred, 'validation', y_null=y_null)

    y_pred = network.predict(data['x_val'])
    y_pred = y_pred[:, 3]
    y = data['y_val'][:, 3]
    y_null = np.mean(y, axis=0)
    regression_score(y, y_pred, 'hard brake', y_null=y_null)

    data = dataset_loaders.risk_dataset_loader(flags.dataset_filepath,
                                               shuffle=False,
                                               train_split=1.,
                                               debug_size=flags.debug_size,
                                               timesteps=flags.timesteps)
    y_pred = network.predict(data['x_train'])
    regression_score(data['y_train'], y_pred, 'unshuffled', data)
def main(argv=None):
    # custom parse of flags for list input
    compression_flags.custom_parse_flags(FLAGS)

    # set random seeds
    np.random.seed(FLAGS.random_seed)
    tf.set_random_seed(FLAGS.random_seed)

    # load dataset
    input_filepath = FLAGS.dataset_filepath
    data = dataset_loaders.risk_dataset_loader(
        input_filepath,
        shuffle=True,
        train_split=.9,
        debug_size=FLAGS.debug_size,
        timesteps=FLAGS.timesteps,
        num_target_bins=FLAGS.num_target_bins,
        balanced_class_loss=FLAGS.balanced_class_loss,
        target_index=FLAGS.target_index)

    if FLAGS.use_priority:
        d = priority_dataset.PrioritizedDataset(data, FLAGS)
    else:
        if FLAGS.balanced_class_loss:
            d = dataset.WeightedDataset(data, FLAGS)
        else:
            d = dataset.Dataset(data, FLAGS)

    print('means:\n{}\n{}'.format(np.mean(d.data['y_train'], axis=0),
                                  np.mean(d.data['y_val'], axis=0)))
    y = copy.deepcopy(d.data['y_val'])
    y[y == 0.] = 1e-8
    y[y == 1.] = 1 - 1e-8
    compression_metrics.regression_score(y, np.mean(y, axis=0), 'baseline')
    compression_metrics.regression_score(y, y, 'correct')

    # fit the model
    with tf.Session(config=tf.ConfigProto(
            log_device_placement=False)) as session:
        # if the timestep dimension is > 1, use recurrent network
        if FLAGS.timesteps > 1:
            network = rnn.RecurrentNeuralNetwork(session, FLAGS)
        else:
            if FLAGS.task_type == 'classification':
                if FLAGS.balanced_class_loss:
                    network = ffnn.WeightedClassificationFeedForwardNeuralNetwork(
                        session, FLAGS)
                else:
                    network = ffnn.ClassificationFeedForwardNeuralNetwork(
                        session, FLAGS)
            else:
                network = ffnn.FeedForwardNeuralNetwork(session, FLAGS)
        network.fit(d)

        # save weights to a julia-compatible weight file
        neural_networks.utils.save_trainable_variables(
            FLAGS.julia_weights_filepath, session, data)

        # evaluate the fit
        compression_metrics.evaluate_fit(network, data, FLAGS)
Exemple #4
0
def run_pca():
    # constants
    input_filepath = '../../data/datasets/2_19/risk_10_sec_10_timesteps.h5'
    debug_size = 100000
    timesteps = 1
    target_index = None
    n_components = 2
    batch_size = 1000

    # load data
    data = dataset_loaders.risk_dataset_loader(input_filepath,
                                               shuffle=False,
                                               train_split=1.,
                                               debug_size=debug_size,
                                               timesteps=timesteps,
                                               target_index=target_index)
    features = data['x_train']
    targets = data['y_train']
    idxs = np.where(np.sum(targets, axis=1) > 0)[0]
    features = features[idxs]
    targets = targets[idxs]

    # run pca
    # pca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
    pca = PCA(n_components=n_components)
    pca.fit(features)

    # plot it
    reduced_features = pca.transform(features)
    colors = []
    counts = collections.defaultdict(int)
    cat_max = 500
    idxs = []
    for i, t in enumerate(targets):
        c = None
        if t[0] > 0. and counts[0] < cat_max:
            c = 'blue'
            counts[0] += 1
        elif t[1] > 0. and counts[1] < cat_max:
            c = 'red'
            counts[1] += 1
        elif t[2] > 0. and counts[2] < cat_max:
            c = 'purple'
            counts[2] += 1
        elif t[3] > 0. and counts[3] < cat_max:
            c = 'orange'
            counts[3] += 1
        elif t[4] > 0. and counts[4] < cat_max:
            c = 'green'
            counts[4] += 1
        if c is not None:
            colors.append(c)
            idxs.append(i)
    plt.figure(figsize=(10, 10))
    plt.scatter(reduced_features[idxs, 0],
                reduced_features[idxs, 1],
                c=colors,
                alpha=.5)
    plt.show()
Exemple #5
0
    def test_normalization(self):
        input_filepath = os.path.abspath(
            os.path.join(os.path.dirname(__file__), 'data', 'datasets',
                         'debug.h5'))
        data_unnorm = dataset_loaders.risk_dataset_loader(input_filepath,
                                                          normalize=False)
        data_norm = dataset_loaders.risk_dataset_loader(input_filepath,
                                                        normalize=True)

        mean = np.mean(data_unnorm['x_train'], axis=0)
        expected = (data_unnorm['x_train'] - mean)
        std = np.std(expected, axis=0)
        std[std < 1e-8] = 1
        expected /= std

        actual = data_norm['x_train']
        np.testing.assert_array_equal(expected, actual)
Exemple #6
0
def main(argv=None):
    # use the flags from regular compression
    FLAGS = compression.run_compression.FLAGS

    # set random seeds
    np.random.seed(FLAGS.random_seed)
    tf.set_random_seed(FLAGS.random_seed)

    # set up training constants
    basedir = os.path.split(FLAGS.dataset_filepath)[0]
    if not os.path.exists(basedir):
        os.mkdir(basedir)
    dataset_filepath_template = os.path.join(basedir, 'iter_{}.h5')
    basedir = os.path.split(FLAGS.julia_weights_filepath)[0]
    if not os.path.exists(basedir):
        os.mkdir(basedir)
    network_filepath_template = os.path.join(basedir, 'iter_{}.weights')

    # need some way to convey to the dataset collector the seed at which it
    # should begin collection. Accomplish this by tracking the value in flags
    FLAGS.initial_seed = 1

    # create the model then repeatedly collect and fit datasets
    with tf.Session() as session:

        # build the network to use throughout training
        network = ffnn.FeedForwardNeuralNetwork(session, FLAGS)

        # none signifies non bootstrap dataset
        network_filepath = FLAGS.initial_network_filepath

        for bootstrap_iter in range(FLAGS.bootstrap_iterations):

            # generate a dataset
            dataset_filepath = dataset_filepath_template.format(bootstrap_iter)
            generate_dataset(FLAGS, dataset_filepath, network_filepath)

            # load in the dataset
            data = dataset_loaders.risk_dataset_loader(
                dataset_filepath,
                normalize=True,
                shuffle=True,
                train_split=.9,
                debug_size=FLAGS.debug_size)
            d = dataset.Dataset(data, FLAGS)

            # fit the network to the dataset
            network.fit(d)

            # save weights to a julia-compatible weight file for next iteration
            network_filepath = network_filepath_template.format(bootstrap_iter)
            neural_networks.utils.save_trainable_variables(
                network_filepath, session, data)

            # increment the initial seed by the number of scenarios simulated
            # during each collection iteration
            FLAGS.initial_seed += FLAGS.num_scenarios
Exemple #7
0
def main(argv=None):
    # custom parse of flags for list input
    prediction_flags.custom_parse_flags(FLAGS)

    # set random seeds
    np.random.seed(FLAGS.random_seed)
    tf.set_random_seed(FLAGS.random_seed)

    # load dataset
    input_filepath = FLAGS.dataset_filepath
    data = dataset_loaders.risk_dataset_loader(
        input_filepath,
        shuffle=FLAGS.shuffle_data,
        train_split=FLAGS.train_split,
        debug_size=FLAGS.debug_size,
        timesteps=FLAGS.timesteps,
        num_target_bins=FLAGS.num_target_bins,
        balanced_class_loss=FLAGS.balanced_class_loss,
        target_index=FLAGS.target_index,
        load_likelihood_weights=FLAGS.use_likelihood_weights)

    # infer what the input dimension should be from the data
    FLAGS.input_dim = prediction_utils.infer_input_dim(data)
    FLAGS.output_dim = prediction_utils.infer_output_dim(data)

    if FLAGS.balanced_class_loss or FLAGS.use_likelihood_weights:
        d = dataset.WeightedDataset(data, FLAGS)
    else:
        d = dataset.Dataset(data, FLAGS)

    print('training set size: {}'.format(len(data['x_train'])))
    print('means:\n{}\n{}'.format(np.mean(d.data['y_train'], axis=0),
                                  np.mean(d.data['y_val'], axis=0)))
    y = copy.deepcopy(d.data['y_val'])
    y[y == 0.] = 1e-8
    y[y == 1.] = 1 - 1e-8
    prediction_metrics.regression_score(y, np.mean(y, axis=0), 'baseline')
    prediction_metrics.regression_score(y, y, 'correct')

    # fit the model
    with tf.Session(config=tf.ConfigProto(
            log_device_placement=False)) as session:
        # split based on the task being performed
        if FLAGS.task_type == 'classification':
            network = nnp.NeuralNetworkClassifier(session, FLAGS)
        else:
            network = nnp.NeuralNetworkPredictor(session, FLAGS)

        network.fit(d)

        # save weights to a julia-compatible weight file
        neural_networks.utils.save_trainable_variables(
            FLAGS.julia_weights_filepath, session, data)

        # evaluate the fit
        prediction_metrics.evaluate_fit(network, data, FLAGS)
Exemple #8
0
 def test_risk_dataset_loader(self):
     input_filepath = os.path.abspath(
         os.path.join(os.path.dirname(__file__), 'data', 'datasets',
                      'debug.h5'))
     data = dataset_loaders.risk_dataset_loader(input_filepath,
                                                train_split=.8)
     keys = ['x_train', 'y_train', 'x_val', 'y_val']
     for k in keys:
         self.assertTrue(k in data)
     num_train = float(len(data['x_train']))
     num_val = len(data['x_val'])
     self.assertAlmostEqual(num_train / (num_train + num_val), .8, 2)
Exemple #9
0
def main(argv=None):
    compression_flags.custom_parse_flags(FLAGS)
    np.random.seed(FLAGS.random_seed)
    tf.set_random_seed(FLAGS.random_seed)

    data = dataset_loaders.risk_dataset_loader(
        FLAGS.dataset_filepath,
        shuffle=True,
        train_split=.9,
        debug_size=FLAGS.debug_size,
        timesteps=FLAGS.timesteps,
        num_target_bins=FLAGS.num_target_bins,
        balanced_class_loss=FLAGS.balanced_class_loss,
        target_index=FLAGS.target_index)
    x = data['x_train']
    y = data['y_train']
    eps = 1e-8
    y[y == 0.] = eps
    y[y == 1.] = 1 - eps

    base_dir = '/home/sisl/blake/risk_prediction/data/snapshots'
    snapshot_dir_names = ['mc_{}'.format(c) for c in [1, 2, 4, 8, 16, 32]]
    snapshot_dirs = [
        os.path.join(base_dir, name) for name in snapshot_dir_names
    ]
    print(snapshot_dirs)
    r2s = []
    with tf.Session() as session:
        network = ffnn.FeedForwardNeuralNetwork(session, FLAGS)
        for snapshot_dir in snapshot_dirs:
            FLAGS.snapshot_dir = snapshot_dir
            network.load()
            y_pred = network.predict(x)
            y_pred[y_pred < eps] = eps
            y_pred[y_pred > 1 - eps] = 1 - eps
            ll = np.sum(y * np.log(y_pred)) + np.sum(
                (1 - y) * np.log(1 - y_pred))
            y_null = np.mean(y, axis=0, keepdims=True)
            y_null[y_null < eps] = eps
            y_null[y_null > 1 - eps] = 1 - eps
            ll_null = np.sum(y * np.log(y_null)) + -np.sum(
                (1 - y) * np.log(1 - y_null))
            mcfadden_r2 = 1 - ll / ll_null
            mse = np.sum((y_pred - y)**2)
            mean = np.mean(y_pred)
            r2s.append((mcfadden_r2, mse, mean))

    for (r2, snapshot_dir) in zip(r2s, snapshot_dirs):
        print(snapshot_dir)
        print(r2)
        print()
Exemple #10
0
def main(argv=None):
    # custom parse of flags for list input
    custom_parse_flags(FLAGS)

    # set random seeds
    np.random.seed(FLAGS.random_seed)
    tf.set_random_seed(FLAGS.random_seed)

    # load dataset
    input_filepath = FLAGS.dataset_filepath
    data = dataset_loaders.risk_dataset_loader(
        input_filepath,
        shuffle=True,
        train_split=.9,
        debug_size=FLAGS.debug_size,
        timesteps=FLAGS.timesteps,
        num_target_bins=FLAGS.num_target_bins,
        balanced_class_loss=FLAGS.balanced_class_loss,
        target_index=FLAGS.target_index)

    d = dataset.Dataset(data, FLAGS)

    print(np.mean(d.data['y_train'], axis=0))
    print(np.mean(d.data['y_val'], axis=0))
    y = copy.deepcopy(d.data['y_val'])
    y[y == 0.] = 1e-8
    y[y == 1.] = 1 - 1e-8
    baseline = np.mean(y, axis=0)
    ce = -np.sum(y * np.log(baseline)) + -np.sum(
        (1 - y) * np.log(1 - baseline))
    mse = np.sum((y - baseline)**2)
    r2 = 1 - ((y - baseline)**2).sum() / ((y - y.mean(axis=0))**2).sum()
    num_samples = len(y)
    print("cross entropy from outputting validation mean: {}".format(
        ce / num_samples))
    print("mse from outputting validation mean: {}".format(mse / num_samples))
    print("r2 from outputting validation mean: {}".format(r2))

    ce = -np.sum(y * np.log(y)) + -np.sum((1 - y) * np.log(1 - y))
    print("cross entropy from outputting correct values: {}".format(
        ce / num_samples))
    try:
        ce = -np.sum(y[:, 3] * np.log(y[:, 3])) + -np.sum(
            (1 - y[:, 3]) * np.log(1 - y[:, 3]))
        print("hard brake cross entropy from outputting correct values: {}".
              format(ce / num_samples))
    except:
        pass
    # fit the model
    with tf.Session(config=tf.ConfigProto(
            log_device_placement=False)) as session:

        network = ffnn.RiskFeatureNeuralNetwork(session, FLAGS)
        network.fit(d)

        # y_idxs = np.where(np.sum(data['y_val'][:10000], axis=1) > 1e-4)[0]
        # y_idxs = np.random.permutation(y_idxs)[:10]
        # y_pred = network.predict(data['x_val'][y_idxs])

        # # final train loss
        # y_pred = network.predict(data['x_train'])
        # y = data['y_train']
        # y_null = np.mean(y, axis=0)
        # classification_score(y, y_pred, 'train', y_null=y_null)

        # # final validation loss
        # y_pred = network.predict(data['x_val'])
        # y = data['y_val']
        # y_null = np.mean(y, axis=0)
        # classification_score(y, y_pred, 'val', y_null=y_null)

        cluster(data, network, FLAGS)
        default='all')
    parser.add_argument('-f', dest='dataset_filepath', 
        default='../../data/datasets/may/ngsim_5_sec.h5')
    args = parser.parse_args()
    return args

if __name__ == '__main__':
    # in case things get a bit crazy
    np.random.seed(1)

    # parse inputs
    opts = parse_args()

    # load the dataset
    data = dataset_loaders.risk_dataset_loader(
        opts.dataset_filepath, shuffle=True, train_split=.9, 
        debug_size=None, timesteps=1, num_target_bins=2)

    # build the model
    if len(data['y_train'].shape) > 1:
        _, num_targets = data['y_train'].shape
    else:
        num_targets = 1
    if opts.model_type == 'all':
        models = [build_model(mt, num_targets) for mt in MODEL_TYPES]
    else:
        model = build_model(opts.model_type, num_targets)



    # x, y = data['x_train'], data['y_train']
    # input_filepaths = [os.path.join(basedir, f) for f in input_filenames]
    # dataset_labels = ['{}_seconds'.format(i) for i in range(1, num_iters + 1)]
    
    # # check for / create output directory
    base_directory = '../../data/visualizations/'
    output_directory = os.path.join(
        base_directory, os.path.split(
            input_filepaths[0])[-1]).replace('.h5', '')
    if not os.path.exists(output_directory):
        os.mkdir(output_directory)

    # load in each dataset
    load_likelihood_weights = False
    debug_size = 500000
    datasets = [dataset_loaders.risk_dataset_loader(
        input_filepath, shuffle=False, train_split=1., 
        debug_size=debug_size, normalize=False, timesteps=1,
        load_likelihood_weights=load_likelihood_weights) for 
        input_filepath in input_filepaths]

    feature_labels = h5py.File(input_filepaths[-1], 'r')['risk'].attrs['feature_names']

    # display basic info about the targets
    display_target_info(datasets, target_labels, dataset_labels, output_directory)

    # # histogram the features
    # visualize_features(datasets[-1], feature_labels, dataset_labels[-1],
    #     output_directory)

    # # compare histogram of features across datasets
    # compare_feature_histograms(datasets, feature_labels, dataset_labels,
    #     output_directory)
Exemple #13
0
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    # in case things get a bit crazy
    np.random.seed(1)

    # parse inputs
    opts = parse_args()

    # load the dataset
    data = dataset_loaders.risk_dataset_loader(opts.dataset_filepath,
                                               shuffle=True,
                                               train_split=.8,
                                               debug_size=None,
                                               timesteps=1,
                                               num_target_bins=2,
                                               target_index=opts.target_index,
                                               load_likelihood_weights=True)

    # build the model
    if len(data['y_train'].shape) > 1:
        _, num_targets = data['y_train'].shape
    else:
        num_targets = 1

    if opts.model_type == 'all':
        models = [build_model(mt, num_targets) for mt in MODEL_TYPES]
        names = [mt for mt in MODEL_TYPES]
    else:
        model = build_model(opts.model_type, num_targets)
def main(argv=None):
    # custom parse of flags for list input
    compression.compression_flags.custom_parse_flags(FLAGS)

    # set random seeds
    np.random.seed(FLAGS.random_seed)
    tf.set_random_seed(FLAGS.random_seed)

    # load dataset
    input_filepath = FLAGS.dataset_filepath
    data = dataset_loaders.risk_dataset_loader(input_filepath,
                                               shuffle=True,
                                               train_split=.8,
                                               debug_size=FLAGS.debug_size,
                                               normalize=True,
                                               timesteps=1)

    # set training sizes
    num_runs = 12
    train_scores = []
    val_scores = []
    sizes = [
        int(v) for v in np.logspace(
            np.log2(1000), np.log2(len(data['x_train'])), num_runs, base=2.0)
    ]
    print(sizes)
    eps = 1e-12

    # for each size, fit for a set number of epochs and then compute loss
    for i in range(num_runs):
        # train for longer with more data
        FLAGS.num_epochs += 10

        # create run-specific dataset
        cur_data = {
            'x_train': data['x_train'][:sizes[i]],
            'y_train': data['y_train'][:sizes[i]],
            'x_val': data['x_val'],
            'y_val': data['y_val']
        }
        d = dataset.Dataset(cur_data, FLAGS)

        with tf.Session() as session:
            network = ffnn.FeedForwardNeuralNetwork(session, FLAGS)
            network.fit(d)

            # final train loss
            y_pred = network.predict(cur_data['x_train']).astype(np.float128)
            y_pred[y_pred < eps] = eps
            y_pred[y_pred > (1 - eps)] = 1 - eps
            y = cur_data['y_train']
            ce = (-np.sum(y * np.log(y_pred)) + -np.sum(
                (1 - y) * np.log(1 - y_pred))) / len(y)
            mse = np.mean((y - y_pred)**2)
            train_scores.append((ce, mse))

            # final validation loss
            y_pred = network.predict(cur_data['x_val']).astype(np.float128)
            y_pred[y_pred < eps] = eps
            y_pred[y_pred > (1 - eps)] = 1 - eps
            y = cur_data['y_val']
            ce = (-np.sum(y * np.log(y_pred)) + -np.sum(
                (1 - y) * np.log(1 - y_pred))) / len(y)
            mse = np.mean((y - y_pred)**2)
            np.savez('../../data/scratch.npz', y_pred=y_pred)
            val_scores.append((ce, mse))

            print('size: {}\ttrain: {}\tval: {}'.format(
                sizes[i], train_scores[i], val_scores[i]))

        # reset graph after each run
        tf.python.reset_default_graph()
        output_filepath = os.path.join(
            '../../data/learning_curves/',
            os.path.split(input_filepath)[-1].replace('.h5', '.npz'))
        np.savez(output_filepath,
                 sizes=sizes,
                 train_scores=train_scores,
                 val_scores=val_scores)
Exemple #15
0
def main(argv=None):
    # parse the hidden layer dims
    custom_parse_flags(FLAGS)

    # load the data
    data = dataset_loaders.risk_dataset_loader(FLAGS.dataset_filepath,
                                               shuffle=False,
                                               train_split=.9,
                                               debug_size=FLAGS.debug_size,
                                               timesteps=1)

    # only select the indices with nonzero targets
    # idxs = np.where(np.sum(data['y_train'], axis=1) > 0)[0]
    idxs = np.where(data['y_train'][:, 0] > 0.)[0]
    # idxs = np.array(list(idxs) + list(range(500)))
    data['x_train'] = data['x_train'][idxs]
    data['y_train'] = data['y_train'][idxs]
    # idxs = np.where(np.sum(data['y_val'], axis=1) > 0)[0]
    idxs = np.where(data['y_val'][:, 0] > 0.)[0]
    # idxs = np.array(list(idxs) + list(range(500)))
    data['x_val'] = data['x_val'][idxs]
    data['y_val'] = data['y_val'][idxs]

    # subselect lidar features
    data['x_train'] = data['x_train'][:, -40:]
    data['x_val'] = data['x_val'][:, -40:]

    print(len(data['x_train']))

    with tf.Session() as session:
        # build the autoencoder
        input_ph, dropout_ph, encode, decode, loss, train_op = AE(
            FLAGS.input_dim, FLAGS.encode_dims, FLAGS.decode_dims,
            FLAGS.learning_rate)
        session.run(tf.global_variables_initializer())

        saver = tf.train.Saver(max_to_keep=10)
        if not os.path.exists(FLAGS.snapshot_dir):
            os.mkdir(FLAGS.snapshot_dir)
        if FLAGS.load_network:
            filepath = tf.train.latest_checkpoint(FLAGS.snapshot_dir)
            if filepath is not None:
                saver.restore(session, filepath)

        # train it
        num_train_batches = int(len(data['x_train']) / FLAGS.batch_size)
        if (num_train_batches * FLAGS.batch_size) < len(data['x_train']):
            num_train_batches += 1
        num_val_batches = int(len(data['x_val']) / FLAGS.batch_size)
        if num_val_batches * FLAGS.batch_size < len(data['x_val']):
            num_val_batches += 1
        start_time = time.time()
        for epoch in range(FLAGS.num_epochs):

            # train
            train_losses = []
            for bidx in range(num_train_batches):
                s = bidx * FLAGS.batch_size
                e = s + FLAGS.batch_size
                x = data['x_train'][s:e]
                feed_dict = {input_ph: x, dropout_ph: FLAGS.dropout_keep_prob}
                output_list = [loss, train_op]
                num_loss, _ = session.run(output_list, feed_dict=feed_dict)
                train_losses.append(num_loss / len(x))

            # val
            val_losses = []
            for bidx in range(num_val_batches):
                s = bidx * FLAGS.batch_size
                e = s + FLAGS.batch_size
                x = data['x_val'][s:e]
                feed_dict = {input_ph: x, dropout_ph: 1.}
                num_loss = session.run(loss, feed_dict=feed_dict)
                val_losses.append(num_loss / len(x))

            if (epoch + 1) % FLAGS.save_weights_every == 0:
                filepath = os.path.join(FLAGS.snapshot_dir, 'weights')
                saver.save(session, filepath, global_step=epoch)

            # report
            print('epoch: {}\ttrain: {}\tval: {}\ttime: {}'.format(
                epoch, np.mean(train_losses), np.mean(val_losses),
                time.time() - start_time))

        plot_features(data, session, input_ph, dropout_ph, encode, FLAGS)
Exemple #16
0
    parser.add_argument('-m', dest='model_type', 
        default='all')
    parser.add_argument('-f', dest='dataset_filepath', 
        default='../../data/datasets/1_1/risk_26.h5')
    args = parser.parse_args()
    return args

if __name__ == '__main__':
    # in case things get a bit crazy
    np.random.seed(1)

    # parse inputs
    opts = parse_args()

    # load the dataset
    data = dataset_loaders.risk_dataset_loader(opts.dataset_filepath, 
        normalize=True, debug_size=None, train_split=.9, shuffle=True)

    # build the model
    if len(data['y_train'].shape) > 1:
        _, num_targets = data['y_train'].shape
    else:
        num_targets = 1
    if opts.model_type == 'all':
        models = [build_model(mt, num_targets) for mt in MODEL_TYPES]
    else:
        model = build_model(opts.model_type, num_targets)



    # x, y = data['x_train'], data['y_train']
    # idxs_train = get_nonzero_idxs(y)
Exemple #17
0
    # check for / create output directory
    base_directory = '../../data/visualizations/'
    output_directory = os.path.join(
        base_directory,
        os.path.split(input_filepaths[0])[-1]).replace('.h5', '')
    # output_directory = os.path.join(base_directory, 'risk_beh')
    if not os.path.exists(output_directory):
        os.mkdir(output_directory)

    # load in each dataset
    debug_size = 5000000
    datasets = [
        dataset_loaders.risk_dataset_loader(input_filepath,
                                            shuffle=False,
                                            train_split=1.,
                                            debug_size=debug_size,
                                            normalize=False,
                                            timesteps=1)
        for input_filepath in input_filepaths
    ]

    # display basic info about the targets
    display_target_info(datasets, target_labels, dataset_labels,
                        output_directory)

    # ## analyze behavior
    # for i, dataset in enumerate(datasets):
    #     print(dataset_labels[i])
    #     collision_probability_by_behavior(dataset)

    # compute feature target correlations and write to file
def main(argv=None):
    # custom parse of flags for list input
    custom_parse_flags(FLAGS)

    # set random seeds
    np.random.seed(FLAGS.random_seed)
    tf.set_random_seed(FLAGS.random_seed)

    # load dataset
    input_filepath = FLAGS.dataset_filepath
    data = dataset_loaders.risk_dataset_loader(
        input_filepath,
        shuffle=True,
        train_split=.9,
        debug_size=FLAGS.debug_size,
        timesteps=FLAGS.timesteps,
        num_target_bins=FLAGS.num_target_bins,
        balanced_class_loss=FLAGS.balanced_class_loss,
        target_index=FLAGS.target_index)

    if FLAGS.use_priority:
        d = priority_dataset.PrioritizedDataset(data, FLAGS)
    else:
        if FLAGS.balanced_class_loss:
            d = dataset.WeightedDataset(data, FLAGS)
        else:
            d = dataset.Dataset(data, FLAGS)

    print(np.mean(d.data['y_train'], axis=0))
    print(np.mean(d.data['y_val'], axis=0))
    y = copy.deepcopy(d.data['y_val'])
    y[y == 0.] = 1e-8
    y[y == 1.] = 1 - 1e-8
    baseline = np.mean(y, axis=0)
    ce = -np.sum(y * np.log(baseline)) + -np.sum(
        (1 - y) * np.log(1 - baseline))
    mse = np.sum((y - baseline)**2)
    r2 = 1 - ((y - baseline)**2).sum() / ((y - y.mean(axis=0))**2).sum()
    num_samples = len(y)
    print("cross entropy from outputting validation mean: {}".format(
        ce / num_samples))
    print("mse from outputting validation mean: {}".format(mse / num_samples))
    print("r2 from outputting validation mean: {}".format(r2))

    ce = -np.sum(y * np.log(y)) + -np.sum((1 - y) * np.log(1 - y))
    print("cross entropy from outputting correct values: {}".format(
        ce / num_samples))
    try:
        ce = -np.sum(y[:, 3] * np.log(y[:, 3])) + -np.sum(
            (1 - y[:, 3]) * np.log(1 - y[:, 3]))
        print("hard brake cross entropy from outputting correct values: {}".
              format(ce / num_samples))
    except:
        pass
    # fit the model
    with tf.Session(config=tf.ConfigProto(
            log_device_placement=False)) as session:
        # if the timestep dimension is > 1, use recurrent network
        if FLAGS.timesteps > 1:
            network = rnn.RecurrentNeuralNetwork(session, FLAGS)
        else:
            if FLAGS.task_type == 'classification':
                if FLAGS.balanced_class_loss:
                    network = ffnn.WeightedClassificationFeedForwardNeuralNetwork(
                        session, FLAGS)
                else:
                    network = ffnn.ClassificationFeedForwardNeuralNetwork(
                        session, FLAGS)
            else:
                network = ffnn.FeedForwardNeuralNetwork(session, FLAGS)
        network.fit(d)

        # save weights to a julia-compatible weight file
        neural_networks.utils.save_trainable_variables(
            FLAGS.julia_weights_filepath, session, data)

        y_idxs = np.where(np.sum(data['y_val'][:10000], axis=1) > 1e-4)[0]
        y_idxs = np.random.permutation(y_idxs)[:10]
        y_pred = network.predict(data['x_val'][y_idxs])

        for y_pred_s, y_s in zip(y_pred, data['y_val'][y_idxs]):
            print(y_pred_s)
            print(y_s)
            print()

        # determine the function used for assessment based on the task
        score = (regression_score
                 if FLAGS.task_type == 'regression' else classification_score)

        # final train loss
        y_pred = network.predict(data['x_train'])
        y = data['y_train']
        y_null = np.mean(y, axis=0)
        score(y, y_pred, 'train', y_null=y_null)

        # final validation loss
        y_pred = network.predict(data['x_val'])
        y = data['y_val']
        y_null = np.mean(y, axis=0)
        score(y, y_pred, 'val', y_null=y_null)

        # only makes sense to run this with regression
        if FLAGS.task_type == 'regression':
            # final validation loss, hard braking
            y_pred = network.predict(data['x_val'])
            y_pred = y_pred[:, 3]
            y = data['y_val'][:, 3]
            y_null = np.mean(y, axis=0)
            score(y, y_pred, 'hard brake', y_null=y_null)

        # score again the unshuffled data
        if FLAGS.task_type == 'regression':
            data = dataset_loaders.risk_dataset_loader(
                input_filepath,
                shuffle=False,
                train_split=1.,
                debug_size=FLAGS.debug_size)
            unnorm_data = dataset_loaders.risk_dataset_loader(
                input_filepath,
                shuffle=False,
                train_split=1.,
                debug_size=FLAGS.debug_size,
                normalize=False)

            y_pred = network.predict(data['x_train'])
            y = data['y_train']
            score(y, y_pred, 'unshuffled', data, unnorm_data)

        # save weights to a julia-compatible weight file
        neural_networks.utils.save_trainable_variables(
            FLAGS.julia_weights_filepath, session, data)