def test_df_to_padded_padded_to_df(): """Tests df_to_padded, padded_to_df """ # Call with names? Call with order? # Continuous tte? # Contiguous t? # np.random.seed(1) n_seqs = 100 max_seq_length = 100 ids = xrange(n_seqs) df = generate_random_df(n_seqs, max_seq_length) df = df.reset_index(drop=True) # Column names to transform to tensor column_names = ['event', 'int_column', 'double_column'] dtypes = df[column_names].dtypes.values padded = df_to_padded(df, column_names) df_new = padded_to_df(padded, column_names, dtypes, ids=ids) column_names = ['id', 't', 'event', 'int_column', 'double_column'] # Pandas is awful. Index changes when slicing df = df[column_names].reset_index(drop=True) pd.util.testing.assert_frame_equal(df[column_names], df_new)
def run_test( # '1' on the end dirty way to not pollute testing-namespace id_col1, abs_time_col1, discrete_time1, pad_between_steps1): np.random.seed(1) # Should fail randomly if unique_times = False since it reduces those # times. df = generate_random_df(n_seqs=5, max_seq_length=10, unique_times=True) # rename the abs_time_col to something new to spot assumptions. df.rename(columns={"dt": abs_time_col1, 'id': id_col1}, inplace=True) column_names1 = ['event', 'int_column', 'double_column'] padded, padded_t, seq_ids, df_collapsed = \ data_pipeline(df, id_col=id_col1, abs_time_col=abs_time_col1, column_names=column_names1, discrete_time=discrete_time1, pad_between_steps=pad_between_steps1, infer_seq_endtime=False, time_sec_interval=1, timestep_aggregation_dict=None, drop_last_timestep=False ) if pad_between_steps1: df_new = padded_to_df(padded, column_names1, [int, int, float], ids=seq_ids, id_col=id_col1, t_col='t_elapsed') df = df[[id_col1, 't_elapsed'] + column_names1].reset_index(drop=True) pd.util.testing.assert_frame_equal(df, df_new) else: df_new = padded_to_df(padded, column_names1, [int, int, float], ids=seq_ids, id_col=id_col1, t_col='t_ix') df = df[[id_col1, 't_ix'] + column_names1].reset_index(drop=True) pd.util.testing.assert_frame_equal(df, df_new)
def run_test( # '1' on the end dirty way to not pollute testing-namespace id_col1, abs_time_col1, discrete_time1, pad_between_steps1): np.random.seed(1) # Should fail randomly if unique_times = False since it reduces those # times. df = generate_random_df(n_seqs=5, max_seq_length=10, unique_times=True) # rename the abs_time_col to something new to spot assumptions. df.rename(columns={"dt": abs_time_col1, 'id': id_col1}, inplace=True) column_names1 = ['event', 'int_column', 'double_column'] padded, padded_t, seq_ids, df_collapsed = \ data_pipeline(df, id_col=id_col1, abs_time_col=abs_time_col1, column_names=column_names1, discrete_time=discrete_time1, pad_between_steps=pad_between_steps1, infer_seq_endtime=False, time_sec_interval=1, timestep_aggregation_dict=None, drop_last_timestep=False ) if pad_between_steps1: df_new = padded_to_df(padded, column_names1, [ int, int, float], ids=seq_ids, id_col=id_col1, t_col='t_elapsed') df = df[[id_col1, 't_elapsed']+column_names1].reset_index(drop=True) pd.util.testing.assert_frame_equal(df, df_new) else: df_new = padded_to_df(padded, column_names1, [ int, int, float], ids=seq_ids, id_col=id_col1, t_col='t_ix') df = df[[id_col1, 't_ix']+column_names1].reset_index(drop=True) pd.util.testing.assert_frame_equal(df, df_new)
def df_to_padded_padded_to_df_runner(t_col): n_seqs = 5 max_seq_length = 10 ids = xrange(n_seqs) cols_to_expand = ['event', 'int_column', 'double_column'] np.random.seed(1) df = generate_random_df(n_seqs, max_seq_length) df = df.reset_index(drop=True) # Column names to transform to tensor dtypes = df[cols_to_expand].dtypes.values padded = df_to_padded(df, cols_to_expand, 'id', t_col) df_new = padded_to_df(padded, cols_to_expand, dtypes, ids, 'id', t_col) # Pandas is awful. Index changes when slicing df = df[['id', t_col] + cols_to_expand].reset_index(drop=True) pd.util.testing.assert_frame_equal(df, df_new)
def test_df_to_padded_padded_to_df(): """Tests df_to_padded, padded_to_df """ # Call with names? Call with order? # Continuous tte? # Contiguous t? # np.random.seed(1) n_seqs = 100 max_seq_length = 100 ids = xrange(n_seqs) df = generate_random_df(n_seqs, max_seq_length) column_names = ['event', 'int_column', 'double_column'] dtypes = ['double', 'int', 'float'] padded = df_to_padded(df, column_names) df_new = padded_to_df(padded, column_names, dtypes, ids=ids) assert False not in ( df[['id', 't', 'event', 'int_column', 'double_column']].values == df_new.values)[0],\ 'test_df_to_padded_padded_to_df failed'
plt.plot(history.history['loss'], label='training') plt.plot(history.history['val_loss'],label='validation') plt.legend() plt.show() weightwatcher.plot() ## Predict predicted = model.predict(x) predicted[:,:,1]=predicted[:,:,1]+predicted[:,:,0]*0# lazy re-add of NAN-mask print(predicted.shape) print('mean alpha pred',np.nanmean(predicted[:,:,0])) print('mean beta pred',np.nanmean(predicted[:,:,1])) # Here you'd stop after transforming to dataframe and piping it back to some database tr.padded_to_df(predicted,column_names=["alpha","beta"],dtypes=[float,float]) ## Scatter # Pick some random sequence np.random.seed(12) # 9, 6,5,4 ok random_selection =np.random.choice(predicted.shape[0], min([5,predicted.shape[0]])) random_selection = np.sort(random_selection) # Alpha and beta projections alpha_flat = predicted[:,:,0][~np.isnan(predicted[:,:,0])].flatten() beta_flat = predicted[:,:,1][~np.isnan(predicted[:,:,0])].flatten() ## log-alpha typically makes more sense. for batch_indx in random_selection: from matplotlib.colors import LogNorm