def convert_to_npy(df=None, save=True, modalities=None): if df is None: df = ParquetFile( os.path.join(project_dir, 'data', 'interim', 'data.parq')).to_pandas().set_index('date') user_data = list() for user, group in df.groupby('user'): # Select activity activity = group[group['modality'] == 'cpm'] # Require 8 hours of data activity = activity[pd.isnull(activity).sum(axis=1) < (16 * 12)] if activity.modality.count() >= 120: group = group.loc[activity.index.tolist()] # Extract modalities modality_data = list() modality_grouped = group.groupby('modality') for modality in modalities: modality_data.append( modality_grouped.get_group(modality).drop(['modality'], axis=1)) # We concatenate on dates to ensure the same dimension across modalities user_data.append( pd.concat(modality_data, axis=1).values.reshape(-1, len(modality_data), 289).transpose(0, 2, 1)) data = np.concatenate(user_data, axis=0) if save: np.save(os.path.join(project_dir, 'data', 'interim', 'data.npy'), data) return data
def load_all(): df = ParquetFile(os.path.join(project_dir, 'data', 'interim', 'data.parq')).to_pandas().set_index('date') data_size = df.groupby(['modality', 'user']).size().unstack()
p = sns.heatmap(np.nan_to_num(data[:, :, 0])) plt.show(p) def npy_heatmap(): data = np.load(os.path.join(project_dir, 'data', 'interim', 'data.npy')).astype(np.float32)[:, :-1] print(data.shape) p = sns.heatmap(data[:100, :, 0]) plt.show(p) if __name__ == '__main__': project_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) df = ParquetFile( os.path.join(project_dir, 'data', 'interim', 'data.parq')).to_pandas(filters=[('user', '==', 194)]).set_index('date') modality_data = list() for modality, m_group in df.groupby('modality'): modality_data.append(m_group.drop(['modality', 'user'], axis=1)) # We concatenate on dates to ensure the same dimension across modalities fig, ax = plt.subplots(ncols=2, figsize=(10, 30)) sns.heatmap(pd.concat(modality_data, axis=1).values.reshape(-1, 6, 288)[:, -1, :], ax=ax[0]) sns.heatmap(modality_data[-1], ax=ax[1]) plt.show(fig)