Ejemplo n.º 1
0
                      orient='records')


def dataframe_add_key(data: pandas.DataFrame):
    ''' Adds the Key column to all rows of the dataframe '''
    data['Key'] = data.apply(compute_record_key, axis=1)
    return data.set_index('Key')


# Root path of the project
ROOT = Path(os.path.dirname(__file__)) / '..'

# Read the full data file
data = read_csv(ROOT / 'output' / 'data.csv')
for col in data.columns:
    data[col] = series_converter(data[col])

# Backwards compatibility: China dataset and Region -> RegionName
china = data[data['CountryCode'] == 'CN']
china = china[~china['RegionCode'].isna()]
dataframe_split(china, ('RegionCode', ), ROOT, 'cn')
china['Region'] = china['RegionName']
dataframe_split(china, ('RegionCode', ), ROOT, 'china')

# Backwards compatibility: Usa dataset and RegionName -> RegionCode
usa = data[data['CountryCode'] == 'US']
usa = usa[~usa['RegionCode'].isna()]
dataframe_split(usa, ('RegionCode', ), ROOT, 'us')
usa['Region'] = usa['RegionCode']
dataframe_split(usa, ('RegionCode', ), ROOT, 'usa')
Ejemplo n.º 2
0
    # Estimate daily counts per category assuming ratio is constant
    df['NewCases'] = df['Confirmed'].diff().astype('Int64')
    df['NewDeaths'] = df['Deaths'].diff().astype('Int64')
    df['NewMild'] = df['NewCases'] * mild_ratio
    df['NewSevere'] = df['NewCases'] * severe_ratio
    df['NewCritical'] = df['NewCases'] * critical_ratio
    df = df[['NewCases', 'NewDeaths', 'NewMild', 'NewSevere', 'NewCritical']]

    # Compute the rolling windows for count of active (current) categories
    df['CurrentlyMild'] = df['NewMild'].rolling(
        round(mild_recovery_days)).sum()
    df['CurrentlySevere'] = df['NewSevere'].rolling(
        round(severe_recovery_days)).sum()
    df['CurrentlyCritical'] = df['NewCritical'].rolling(
        round(critical_recovery_days)).sum()

    # Get rid of the first columns which are useless because of the windowing function
    df = df.iloc[
        max(mild_recovery_days, severe_recovery_days, critical_recovery_days):]

    # Make sure all columns have the appropriate type
    for col in df.columns:
        df[col] = series_converter(df[col])

    # Output resulting dataframe
    if print_header_flag:
        df.to_csv(sys.stdout)
        print_header_flag = False
    else:
        df.to_csv(sys.stdout, header=None)
Ejemplo n.º 3
0
    # Early exit: If there are less than DATAPOINT_COUNT output datapoints
    if len(subset) < DATAPOINT_COUNT - PREDICT_WINDOW: continue

    # Perform forecast
    forecast_data = compute_forecast(subset['Confirmed'], PREDICT_WINDOW)

    # Capture only the last DATAPOINT_COUNT days
    forecast_data = forecast_data.sort_index().iloc[-DATAPOINT_COUNT:]

    # Fill out the corresponding index in the output forecast
    for idx in forecast_data.index:
        df_forecast.loc[(idx, key), 'ForecastDate'] = forecast_date
        df_forecast.loc[(idx, key),
                        'Estimated'] = '%.03f' % forecast_data.loc[idx]
        if idx in subset.index:
            df_forecast.loc[(idx, key),
                            'Confirmed'] = int(subset.loc[idx, 'Confirmed'])

# Do data cleanup here
data = df_forecast.reset_index()
forecast_columns = ['ForecastDate', 'Date', 'Key', 'Estimated', 'Confirmed']
data = data.sort_values(['Key', 'Date'])[forecast_columns]

# Make sure the core columns have the right data type
for col in data.columns:
    data[col] = series_converter(data[col])

# Output resulting dataframe
data.to_csv(sys.stdout, index=False)