Python padded_to_df Examples, wtte.transforms.padded_to_df Python Examples

Example #1

0

Show file

File: test_transforms.py Project: johndpope/wtte-rnn

def test_df_to_padded_padded_to_df():
    """Tests df_to_padded, padded_to_df
    """

    # Call with names? Call with order?
    # Continuous tte?
    # Contiguous t?
    #
    np.random.seed(1)
    n_seqs = 100
    max_seq_length = 100
    ids = xrange(n_seqs)

    df = generate_random_df(n_seqs, max_seq_length)
    df = df.reset_index(drop=True)

    # Column names to transform to tensor
    column_names = ['event', 'int_column', 'double_column']
    dtypes = df[column_names].dtypes.values
    padded = df_to_padded(df, column_names)

    df_new = padded_to_df(padded, column_names, dtypes, ids=ids)

    column_names = ['id', 't', 'event', 'int_column', 'double_column']

    # Pandas is awful. Index changes when slicing
    df = df[column_names].reset_index(drop=True)
    pd.util.testing.assert_frame_equal(df[column_names], df_new)

Example #2

0

Show file

File: test_pipelines.py Project: EJHortala/books-2

def run_test(
        # '1' on the end dirty way to not pollute testing-namespace
        id_col1,
        abs_time_col1,
        discrete_time1,
        pad_between_steps1):
    np.random.seed(1)

    # Should fail randomly if unique_times = False since it reduces those
    # times.
    df = generate_random_df(n_seqs=5, max_seq_length=10, unique_times=True)

    # rename the abs_time_col to something new to spot assumptions.
    df.rename(columns={"dt": abs_time_col1, 'id': id_col1}, inplace=True)

    column_names1 = ['event', 'int_column', 'double_column']
    padded, padded_t, seq_ids, df_collapsed = \
        data_pipeline(df,
                      id_col=id_col1,
                      abs_time_col=abs_time_col1,
                      column_names=column_names1,
                      discrete_time=discrete_time1,
                      pad_between_steps=pad_between_steps1,
                      infer_seq_endtime=False,
                      time_sec_interval=1,
                      timestep_aggregation_dict=None,
                      drop_last_timestep=False
                      )

    if pad_between_steps1:
        df_new = padded_to_df(padded,
                              column_names1, [int, int, float],
                              ids=seq_ids,
                              id_col=id_col1,
                              t_col='t_elapsed')
        df = df[[id_col1, 't_elapsed'] + column_names1].reset_index(drop=True)
        pd.util.testing.assert_frame_equal(df, df_new)
    else:
        df_new = padded_to_df(padded,
                              column_names1, [int, int, float],
                              ids=seq_ids,
                              id_col=id_col1,
                              t_col='t_ix')
        df = df[[id_col1, 't_ix'] + column_names1].reset_index(drop=True)

        pd.util.testing.assert_frame_equal(df, df_new)

Example #3

0

Show file

File: test_pipelines.py Project: g6t/wtte-rnn

def run_test(
        # '1' on the end dirty way to not pollute testing-namespace
        id_col1,
        abs_time_col1,
        discrete_time1,
        pad_between_steps1):
    np.random.seed(1)

    # Should fail randomly if unique_times = False since it reduces those
    # times.
    df = generate_random_df(n_seqs=5, max_seq_length=10, unique_times=True)

    # rename the abs_time_col to something new to spot assumptions.
    df.rename(columns={"dt": abs_time_col1,
                       'id': id_col1}, inplace=True)

    column_names1 = ['event', 'int_column', 'double_column']
    padded, padded_t, seq_ids, df_collapsed = \
        data_pipeline(df,
                      id_col=id_col1,
                      abs_time_col=abs_time_col1,
                      column_names=column_names1,
                      discrete_time=discrete_time1,
                      pad_between_steps=pad_between_steps1,
                      infer_seq_endtime=False,
                      time_sec_interval=1,
                      timestep_aggregation_dict=None,
                      drop_last_timestep=False
                      )

    if pad_between_steps1:
        df_new = padded_to_df(padded, column_names1, [
            int, int, float], ids=seq_ids, id_col=id_col1, t_col='t_elapsed')
        df = df[[id_col1, 't_elapsed']+column_names1].reset_index(drop=True)
        pd.util.testing.assert_frame_equal(df, df_new)
    else:
        df_new = padded_to_df(padded, column_names1, [
            int, int, float], ids=seq_ids, id_col=id_col1, t_col='t_ix')
        df = df[[id_col1, 't_ix']+column_names1].reset_index(drop=True)

        pd.util.testing.assert_frame_equal(df, df_new)

Example #4

0

Show file

File: test_transforms.py Project: EJHortala/books-2

def df_to_padded_padded_to_df_runner(t_col):
    n_seqs = 5
    max_seq_length = 10
    ids = xrange(n_seqs)
    cols_to_expand = ['event', 'int_column', 'double_column']
    np.random.seed(1)

    df = generate_random_df(n_seqs, max_seq_length)
    df = df.reset_index(drop=True)

    # Column names to transform to tensor
    dtypes = df[cols_to_expand].dtypes.values
    padded = df_to_padded(df, cols_to_expand, 'id', t_col)

    df_new = padded_to_df(padded, cols_to_expand, dtypes, ids, 'id', t_col)
    # Pandas is awful. Index changes when slicing
    df = df[['id', t_col] + cols_to_expand].reset_index(drop=True)
    pd.util.testing.assert_frame_equal(df, df_new)

Example #5

0

Show file

File: test_transforms.py Project: mynameisfiber/wtte-rnn

def test_df_to_padded_padded_to_df():
    """Tests df_to_padded, padded_to_df
    """

    # Call with names? Call with order?
    # Continuous tte?
    # Contiguous t?
    #
    np.random.seed(1)
    n_seqs = 100
    max_seq_length = 100
    ids = xrange(n_seqs)
    df = generate_random_df(n_seqs, max_seq_length)

    column_names = ['event', 'int_column', 'double_column']
    dtypes = ['double', 'int', 'float']

    padded = df_to_padded(df, column_names)

    df_new = padded_to_df(padded, column_names, dtypes, ids=ids)

    assert False not in (
        df[['id', 't', 'event', 'int_column', 'double_column']].values == df_new.values)[0],\
        'test_df_to_padded_padded_to_df failed'

Example #6

0

Show file

plt.plot(history.history['loss'],    label='training')
plt.plot(history.history['val_loss'],label='validation')
plt.legend()
plt.show()
weightwatcher.plot()

## Predict
predicted = model.predict(x)
predicted[:,:,1]=predicted[:,:,1]+predicted[:,:,0]*0# lazy re-add of NAN-mask
print(predicted.shape)
print('mean alpha pred',np.nanmean(predicted[:,:,0]))
print('mean beta pred',np.nanmean(predicted[:,:,1]))

# Here you'd stop after transforming to dataframe and piping it back to some database
tr.padded_to_df(predicted,column_names=["alpha","beta"],dtypes=[float,float])

## Scatter
# Pick some random sequence
np.random.seed(12) # 9, 6,5,4 ok
random_selection =np.random.choice(predicted.shape[0], min([5,predicted.shape[0]]))
random_selection = np.sort(random_selection)

# Alpha and beta projections
alpha_flat = predicted[:,:,0][~np.isnan(predicted[:,:,0])].flatten()
beta_flat  = predicted[:,:,1][~np.isnan(predicted[:,:,0])].flatten()

## log-alpha typically makes more sense.

for batch_indx in random_selection:
    from matplotlib.colors import LogNorm