# Main
# ---------------------------------
# Read data
data = pd.read_csv(path,
                   low_memory=False,
                   parse_dates=['date'],
                   usecols=features)

# Format
data = data.convert_dtypes()
data = data[features]

# Add dengue interpretation
data['dengue_interpretation'] = \
    oucru_dengue_interpretation_feature(data,
        pcr=True, ns1=True, igm=True,
        paired_igm_igg=True, default=False)

# Overall outcome for patient
patients = data.groupby('study_no').max()
patients['month'] = patients.date.dt.month
patients['year'] = patients.date.dt.year

# Show
print("\nPatients:")
print(patients)

# Compute prevalence
aux = patients.reset_index() \
    .groupby([pd.Grouper(key='date', freq='M'), 'dsource']) \
    .agg(prevalence=('dengue_interpretation', prevalence),
Ejemplo n.º 2
0
    low_memory=False,
    #nrows=50000,
    usecols=usecols,
    parse_dates=['date'])

# Reset index
data = data.reset_index()

# Remove columns with all NaN
data = data.dropna(how='all', axis=1)
data = data.dropna(how='any', subset=features)

# Add dengue interpretation
data['dengue_interpretation'] = \
    oucru_dengue_interpretation_feature(data,
        pcr=True, ns1=False, igm=False, serology=False,
        single_igm_igg=False, paired_igm_igg=False,
        default=False)

# Create outcome
data['outcome'] = \
    data.dengue_interpretation.astype(int)

# Add month
data['month'] = data.date.dt.month

# -----------
# Format data
# -----------

# Keep only day_from_admission == 0 data
#data = data[data.day_from_admission.isin([0])]