# Main # --------------------------------- # Read data data = pd.read_csv(path, low_memory=False, parse_dates=['date'], usecols=features) # Format data = data.convert_dtypes() data = data[features] # Add dengue interpretation data['dengue_interpretation'] = \ oucru_dengue_interpretation_feature(data, pcr=True, ns1=True, igm=True, paired_igm_igg=True, default=False) # Overall outcome for patient patients = data.groupby('study_no').max() patients['month'] = patients.date.dt.month patients['year'] = patients.date.dt.year # Show print("\nPatients:") print(patients) # Compute prevalence aux = patients.reset_index() \ .groupby([pd.Grouper(key='date', freq='M'), 'dsource']) \ .agg(prevalence=('dengue_interpretation', prevalence),
low_memory=False, #nrows=50000, usecols=usecols, parse_dates=['date']) # Reset index data = data.reset_index() # Remove columns with all NaN data = data.dropna(how='all', axis=1) data = data.dropna(how='any', subset=features) # Add dengue interpretation data['dengue_interpretation'] = \ oucru_dengue_interpretation_feature(data, pcr=True, ns1=False, igm=False, serology=False, single_igm_igg=False, paired_igm_igg=False, default=False) # Create outcome data['outcome'] = \ data.dengue_interpretation.astype(int) # Add month data['month'] = data.date.dt.month # ----------- # Format data # ----------- # Keep only day_from_admission == 0 data #data = data[data.day_from_admission.isin([0])]