Esempio n. 1
0
def main(database=MASTER_DATABASE,
         siteData=SITE_DATA,
         bundleOutputFileName=DEFAULT_OUTPUT_JSON_FILE_NAME):
    bundle = {
        'confirmed': {},
        'deaths': {},
    }
    casesType = (
        'confirmed',
        'deaths',
    )
    requiredAttributes = ('info', ) + casesType

    cryostation = Cryostation(database)

    for element in cryostation.items():
        country = element[1]

        if not all((r in country.keys() for r in requiredAttributes)):
            continue

        if not country['info'].get('region', None):
            continue

        for caseType in casesType:
            _applyCountFor(bundle, country, caseType)

    cryostation.close()

    bundleFileName = os.path.join(siteData, bundleOutputFileName)
    with open(bundleFileName, 'w') as outputStream:
        json.dump(bundle, outputStream)

    return bundle, bundleFileName
Esempio n. 2
0
def _updateUSData():
    # 'Cases' -- TSV ref
    # 'confirmed' -- VirusTrack DB key
    print('  updating US...')
    updateUSCases = _fetchCurrentUpdatesUS(columnRef='Cases')
    updateUSCases = _homologizeUpdateData(updateUSCases, US_STATE_NAMES)
    updateUSDeaths = _fetchCurrentUpdatesUS(columnRef='Deaths')
    updateUSDeaths = _homologizeUpdateData(updateUSDeaths, US_STATE_NAMES)

    cryostation = Cryostation(MASTER_DATABASE)
    country = cryostation['US']

    for location in tqdm.tqdm(sorted(updateUSCases.keys())):
        try:
            if location in NIXED_ROWS_INDEX:
                # TODO:  Eugene - what do we do about these uncharted locations?
                # retardedKeys.append(location)
                continue

            country['provinces'][location]['confirmed'][
                SCRAPED_TODAY] = updateUSCases[location][SCRAPED_TODAY]
            country['provinces'][location]['deaths'][
                SCRAPED_TODAY] = updateUSDeaths[location][SCRAPED_TODAY]
        except:
            print('  || Invalid location: %s' % location)
            continue

    cryostation['US'] = country
    cryostation.close()
Esempio n. 3
0
def makeTestDatabase():
    with Cryostation(TEMP_DATABASE_NAME) as cryostationTest:
        with Cryostation(MASTER_DATABASE) as cryostation:
            unitedStates = cryostation['US']
            california = {'confirmed': unitedStates['provinces']['California']['confirmed']}
            newYork = {'confirmed': unitedStates['provinces']['New York']['confirmed']}
            newJersey = {'confirmed': unitedStates['provinces']['New Jersey']['confirmed']}

            item = {'confirmed': unitedStates['confirmed'],
                    'provinces': {'California': california,
                                  'New York': newYork,
                                  'New Jersey': newJersey,
                                  },
                    'key': 'US'}

            cryostationTest['US'] = item

    with Cryostation(TEMP_DATABASE_NAME) as cryostationTest:
        with Cryostation(MASTER_DATABASE) as cryostation:
            italy = {'confirmed': cryostation['Italy']['confirmed'],
                     'key': 'Italy'}
            uk = {'confirmed': cryostation['United Kingdom']['confirmed'],
                  'key': 'United Kingdom'}

            cryostationTest['Italy'] = italy
            cryostationTest['United Kingdom'] = uk
Esempio n. 4
0
def _updateWorldData():
    # 'Cases' -- TSV ref
    # 'confirmed' -- VirusTrack DB key
    print('  updating world...')

    updateWorldCases = _fetchCurrentUpdates('Cases')
    updateWorldCases = _homologizeUpdateData(updateWorldCases, COUNTRY_NAMES)
    updateWorldDeaths = _fetchCurrentUpdates('Deaths')
    updateWorldDeaths = _homologizeUpdateData(updateWorldDeaths, COUNTRY_NAMES)

    cryostation = Cryostation(MASTER_DATABASE)

    for countryName in tqdm.tqdm(sorted(updateWorldCases.keys())):
        if countryName in cryostation:
            country = cryostation[countryName]
            try:
                country['confirmed'][SCRAPED_TODAY] = updateWorldCases[
                    countryName][SCRAPED_TODAY]
                country['deaths'][SCRAPED_TODAY] = updateWorldDeaths[
                    countryName][SCRAPED_TODAY]
            except KeyError:
                # TODO: Eugene - Define a mechanism to add new countries reporting to the database
                pass
            cryostation[countryName] = country
        else:
            print('## country %s not found in database' % countryName)

    cryostation.close()
Esempio n. 5
0
def packGlobal(siteData = SITE_DATA):
    bundle = { 'confirmed': dict(), 'deaths': dict(), }
    cryostation = Cryostation(MASTER_DATABASE)

    for key in tqdm.tqdm(cryostation.keys()):
        if 'confirmed' in cryostation[key]:
            bundle['confirmed'][key] = cryostation[key]['confirmed']
            bundle['deaths'][key] = cryostation[key]['deaths']

    cryostation.close()

    fileName = os.path.join(siteData, BUNDLE_GLOBAL_JSON)
    with open(fileName, 'w') as outputStream:
        json.dump(bundle, outputStream)
Esempio n. 6
0
def computeGrowth(regionType = 'country',
                  countryName = None,
                  casesType = 'confirmed',
                  windowSize = WINDOW_SIZE,
                  disableProgressBar = True,
                  todayDate = TODAY_DATE,
                  databasePath = MASTER_DATABASE,
                  ):

    with Cryostation(databasePath) as cryostation:
        print('Loading time series for countries...')
        regions = cryostation.timeSeriesFor(regionType         = regionType,
                                              countryName        = countryName,
                                              casesType          = casesType,
                                              disableProgressBar = disableProgressBar,
                                              )

    growth = _computeGrowthFor(regions, windowSize = windowSize)

    growthGaugeData = _getGrowthGaugeData(growth, todayDate)

    print('Computing/writing growth factors...')
    if regionType == 'country':
        _appendGrowthToCountries(growthGaugeData, databasePath, disableProgressBar = disableProgressBar)
    elif regionType == 'province':
        _appendGrowthToProvinces(growthGaugeData, databasePath, countryName=countryName, disableProgressBar=True)
    else:
        raise ValueError(f'regionType = {regionType} not understood')
    print('Done.')
Esempio n. 7
0
def _appendGrowthToCountries(growthGaugeDataCountries, databasePath, disableProgressBar=True):
    countryNameAll = list(growthGaugeDataCountries['yesterday'].index)
    for countryName in tqdm(countryNameAll, disable=disableProgressBar):
        with Cryostation(databasePath) as cryostation:
            country = cryostation[countryName]
            country['growth'] = {d: growthGaugeDataCountries[d][countryName] for d in growthGaugeDataCountries.keys()}
            cryostation[countryName] = country
Esempio n. 8
0
def packRegions(countryName = 'US', siteData = SITE_DATA):
    bundle = { 'confirmed': dict(), 'deaths': dict(), }
    cryostation = Cryostation(MASTER_DATABASE)

    country = cryostation[countryName]

    for region in country['regions']:
        if 'confirmed' in country['regions'][region]:
            bundle['confirmed'][region] = country['regions'][region]['confirmed']
            bundle['deaths'][region] = country['regions'][region]['deaths']

    cryostation.close()

    fileName = os.path.join(siteData, BUNDLE_US_REGIONS_JSON)
    with open(fileName, 'w') as outputStream:
        json.dump(bundle, outputStream)
Esempio n. 9
0
def _main(
        siteDataDirectory=SITE_RESOURCES,
        database=MASTER_DATABASE,
        nStateLimit=1000,  # unreachable "infinite" limit
):

    print('vuhospitals - getting the total hospital beds count per state')

    with Cryostation(database) as cryostation:
        country = cryostation['US']
        postCodes = country['provinceCodes']

        count = 0
        for state in tqdm(postCodes.keys()):
            if state in country['provinces']:
                country['provinces'][state][
                    'hospitalBedsCount'] = _getTotalBedsForPostalCode(
                        postCodes[state]['postalCode'])

                # Artificial break for unit tests
                count += 1
                if count == nStateLimit:
                    break

        cryostation['US'] = country

        return country
Esempio n. 10
0
def test__appendGrowthToCountries():
    growthGaugeData = test__getGrowthGaugeData()
    _appendGrowthToCountries(growthGaugeData, TEMP_DATABASE_NAME)
    with Cryostation(TEMP_DATABASE_NAME) as cryostationTest:
        assert isinstance(cryostationTest['US']['growth'], dict)
        assert isinstance(cryostationTest['Italy']['growth'], dict)
        assert isinstance(cryostationTest['United Kingdom']['growth'], dict)
    os.remove(TEMP_DATABASE_NAME)
Esempio n. 11
0
def _appendGrowthToProvinces(growthGaugeDataProvinces, databasePath, countryName = 'US', disableProgressBar=True):
    provinceNameAll = list(growthGaugeDataProvinces['yesterday'].index)
    for stateName in tqdm(provinceNameAll.columns, disable=disableProgressBar):
        with Cryostation(databasePath) as cryostation:
            country = cryostation[countryName]
            province = country['provinces'][stateName]
            province['growth'] = {d: growthGaugeDataProvinces[d][stateName] for d in growthGaugeDataProvinces.keys()}
            country['provinces'][stateName] = province
            cryostation[countryName] = country
Esempio n. 12
0
def _bundleHospitalBeds(countryName = 'US'):
    bundle = dict()

    with Cryostation(MASTER_DATABASE) as cryostation:
        for state in tqdm.tqdm(cryostation[countryName]['provinces'].keys()):
            if 'hospitalBedsCount' in cryostation[countryName]['provinces'][state]:
                bundle[state] = cryostation[countryName]['provinces'][state]['hospitalBedsCount']

    return bundle
Esempio n. 13
0
def updateDatabaseWith(dataset):
    with Cryostation(MASTER_DATABASE) as cryostation:
        country = cryostation['US']

    for state in country['provinces'].keys():
        if 'counties' not in country['provinces'][state]:
            continue

        for county in country['provinces'][state]['counties'].keys():
            try:
                country['provinces'][state]['counties'][county]['confirmed'][
                    SCRAPED_TODAY] = float(dataset[state][county]['confirmed'])
                country['provinces'][state]['counties'][county]['deaths'][
                    SCRAPED_TODAY] = float(dataset[state][county]['deaths'])
            except:
                continue

    with Cryostation(MASTER_DATABASE) as cryostation:
        cryostation['US'] = country
Esempio n. 14
0
def test_computeGrowth():
    makeTestDatabase()
    computeGrowth(regionType='country',
                  casesType='confirmed',
                  todayDate=TEST_TODAY_DATE,
                  databasePath=TEMP_DATABASE_NAME)
    with Cryostation(TEMP_DATABASE_NAME) as cryostationTest:
        assert isinstance(cryostationTest['US']['growth'], dict)
        assert isinstance(cryostationTest['Italy']['growth'], dict)
        assert isinstance(cryostationTest['United Kingdom']['growth'], dict)
    os.remove(TEMP_DATABASE_NAME)
Esempio n. 15
0
def packCountry(countryName = 'US', siteData = SITE_DATA):
    print('  processing states')
    bundle = { 'confirmed': dict(), 'deaths': dict(), 'allCounties': dict(), }
    cryostation = Cryostation(MASTER_DATABASE)

    country = cryostation[countryName]

    for state in country['provinces']:
        if 'confirmed' in country['provinces'][state]:
            bundle['confirmed'][state] = country['provinces'][state]['confirmed']
            bundle['deaths'][state] = country['provinces'][state]['deaths']
        if 'counties' in country['provinces'][state]:
            bundle['allCounties'][state] = country['provinces'][state]['counties']

    cryostation.close()

    print('  processing hospital beds counts by state')
    bundle['hospitalBeds'] = _bundleHospitalBeds(countryName)

    fileName = os.path.join(siteData, BUNDLE_US_JSON)
    with open(fileName, 'w') as outputStream:
        json.dump(bundle, outputStream)
Esempio n. 16
0
def test_plotDataAndPredictionsWithCI():
    meanPredictionTSAll, percentilesTSAll, = loadAll(
        siteData=join(TEST_SITE_DATA, 'test-predictions'))

    with Cryostation(REAL_DATABASE_FILE_NAME) as cs:
        confirmedCasesAll = cs.timeSeriesFor()  # take defaults

    _ = plotDataAndPredictionsWithCI(
        meanPredictionTSAll,
        confirmedCasesAll,
        percentilesTSAll,
        ['Albania', 'Algeria'],
    )
Esempio n. 17
0
def _updateUSRegionsData(target):
    # 'confirmed' -- VirusTrack DB key
    print('  updating US regions...')
    updateUSRegions = dict()

    cryostation = Cryostation(MASTER_DATABASE)
    country = cryostation['US']
    allTime = list(country['provinces'][TOTAL_US_NAME][target].keys())

    for location in tqdm.tqdm(country['provinces']):
        if location in NIXED_ROWS_INDEX:
            continue
        try:
            region = US_REGIONS[location]
            if region not in updateUSRegions:
                updateUSRegions[region] = {
                    SCRAPED_TODAY: 0.0,
                }

            try:
                updateUSRegions[region][SCRAPED_TODAY] += float(
                    country['provinces'][location][target][SCRAPED_TODAY])
            except:
                yesterday = country['provinces'][location][allTime[len(allTime)
                                                                   - 2]]
                updateUSRegions[region][SCRAPED_TODAY] = yesterday
        except KeyError:
            print('  >> Invalid location: %s' % location)
            continue

    for region in sorted(updateUSRegions.keys()):
        country['regions'][region][target][SCRAPED_TODAY] = updateUSRegions[
            region][SCRAPED_TODAY]

    cryostation['US'] = country
    cryostation.close()
Esempio n. 18
0
def test_predictCountries():
    try:
        predictRegions('US',
                       nDaysPredict=10,
                       siteData=TEST_SITE_DATA,
                       logGrowthModel=logGrowthModel,
                       nSamples=TEST_N_SAMPLES,
                       nChains=TEST_N_CHAINS,
                       databasePath=TEST_DATABASE_PATH,
                       )
        _assertValidJSON(join(TEST_SITE_DATA,'prediction-world-mean-US.json'))
        _assertValidJSON(join(TEST_SITE_DATA, 'prediction-world-conf-int-US.json'))

        predictRegions('Alabama',
                       regionType='stateUS',
                       nDaysPredict=10,
                       siteData=TEST_SITE_DATA,
                       logGrowthModel=logGrowthModel,
                       nSamples=TEST_N_SAMPLES,
                       nChains=TEST_N_CHAINS,
                       databasePath=TEST_DATABASE_PATH,
                       )
        _assertValidJSON(join(TEST_SITE_DATA, 'prediction-world-mean-US.json'))
        _assertValidJSON(join(TEST_SITE_DATA, 'prediction-world-conf-int-US.json'))

        nLimitRegions=2

        with Cryostation(TEST_DATABASE_PATH) as cs:
            countries = cs.allCountryNames()

        predictRegions('all',
                       regionType='country',
                       nDaysPredict=10,
                       siteData=TEST_SITE_DATA,
                       logGrowthModel=logGrowthModel,
                       nSamples=TEST_N_SAMPLES,
                       nChains=TEST_N_CHAINS,
                       nLimitRegions=nLimitRegions,
                       databasePath=TEST_DATABASE_PATH,
                       )

        for i in range(nLimitRegions):
            _assertValidJSON(join(TEST_SITE_DATA, f'prediction-world-mean-{countries[i]}.json'))

    except Exception as e:
        raise e
    finally:
        _purge(TEST_SITE_DATA, '.json')
Esempio n. 19
0
def test__computeGrowthFor():
    makeTestDatabase()
    with Cryostation(TEMP_DATABASE_NAME) as cryostation:
        print('Loading time series for countries...')
        regions = cryostation.timeSeriesFor(regionType         = 'country',
                                            casesType          = 'confirmed',
                                            )
    growth = _computeGrowthFor(regions, WINDOW_SIZE)
    assert isinstance(growth, DataFrame)

    smoothCases = (regions.iloc[-WINDOW_SIZE:, 0].mean(),
                   regions.iloc[-WINDOW_SIZE-1:-1, 0].mean(),
                   regions.iloc[-WINDOW_SIZE-2:-2, 0].mean(),
                   )
    growthFactorExpectedFinal = (smoothCases[0] - smoothCases[1])/(smoothCases[1] - smoothCases[2])
    assert abs(growth.iloc[-1,0] - growthFactorExpectedFinal) < 1e-4

    return growth
Esempio n. 20
0
def predictLogisticGrowth(logGrowthModel: StanModel,
                          regionName,
                          target                 = 'confirmed',
                          regionType             = 'country',
                          nSamples               = N_SAMPLES,
                          nChains                = N_CHAINS,
                          nDaysPredict           = N_DAYS_PREDICT,
                          minCasesFilter         = MIN_CASES_FILTER,
                          minNumberDaysWithCases = MIN_NUMBER_DAYS_WITH_CASES,
                          predictionsPercentiles = PREDICTIONS_PERCENTILES,
                          randomSeed             = 2020,
                          databasePath           = DATABASE_PATH,
                          maxTreeDepth           = MAX_TREEDEPTH,
                          ):
    """Predict the region with the nth highest number of cases

    Parameters
    ----------
    logGrowthModel: A compiled pystan model
    regionName: Name of the region to train, which must be a country or US state in Cryostation
    target: 'confirmed' or 'deaths'
    regionType: 'country' or 'stateUS
    nSamples: Number of samples per chain of MCMC
    nChains: Number of independent chains MCMC
    nDaysPredict: Number of days ahead to predict
    minCasesFilter: Minimum number of cases for prediction
    minNumberDaysWithCases: Minimum number of days with at least minCasesFilter
    predictionsPercentiles: Bayesian confidence intervals to evaluate
    randomSeed: Seed for stan sampler
    databasePath: Path to virustrack.db
    maxTreeDepth: max_treedepth for pystan

    Returns
    -------
    regionTS: All data for the queried region
    predictionsMeanTS: Posterior mean prediction
    predictionsPercentilesTS: Posterior percentiles
    trace: pymc3 trace object
    regionTSClean: Data used for training
    """

    with Cryostation(databasePath) as storage:
        try:
            if regionType == 'country':
                if target in storage[regionName].keys():
                    regionTS = pd.Series(storage[regionName][target])
                else:
                    return None
            elif regionType == 'stateUS':
                if target in storage['US']['provinces'][regionName].keys():
                    regionTS = pd.Series(storage['US']['provinces'][regionName][target])
                else:
                    return None
            else:
                raise NotImplementedError
        except Exception as e:
            raise e

    regionTS.index = pd.to_datetime(regionTS.index)
    regionTS.sort_index(inplace=True)

    minIndex = (regionTS > minCasesFilter).argmax()
    regionTSClean = regionTS.iloc[minIndex:]
    if regionTSClean.shape[0] < minNumberDaysWithCases:
        return None

    regionTSClean.index = pd.to_datetime(regionTSClean.index)

    t = regionTSClean.index.to_series().diff().map(lambda d: d.days).fillna(0).cumsum().values
    regionTSCleanLog = np.log(regionTSClean.values + 1)

    logisticGrowthData = {'nDays': regionTSClean.shape[0],
                          't': list(t),
                          'casesLog': list(regionTSCleanLog)
                          }


    fit = logGrowthModel.sampling(data=logisticGrowthData, iter=nSamples, chains=nChains, seed=randomSeed,
                                  control={'max_treedepth':maxTreeDepth}
                                  )

    trace = fit.to_dataframe()

    predictionsMean, predictionsPercentilesTS =  _getPredictionsFromPosteriorSamples(t,
                                                                                     trace,
                                                                                     nDaysPredict,
                                                                                     predictionsPercentiles,
                                                                                     )

    predictionsMeanTS, predictionsPercentilesTS = _castPredictionsAsTS(regionTSClean,
                                                                       nDaysPredict,
                                                                       predictionsMean,
                                                                       predictionsPercentilesTS,
                                                                       )

    regionTS.index = pd.to_datetime(regionTS.index)
    prediction = {
        'regionTS':                 regionTS,
        'predictionsMeanTS':        predictionsMeanTS,
        'predictionsPercentilesTS': predictionsPercentilesTS,
        'trace':                    trace,
        'regionTSClean':            regionTSClean,
        'regionName':               regionName,
        't':                        t,
    }

    return prediction
Esempio n. 21
0
def predictRegions(regionName,
                   regionType='country',
                   target='confirmed',
                   predictionsPercentiles=PREDICTIONS_PERCENTILES,
                   siteData=SITE_DATA,
                   priorLogCarryingCapacity=PRIOR_LOG_CARRYING_CAPACITY,
                   priorMidPoint=PRIOR_MID_POINT,
                   priorGrowthRate=PRIOR_GROWTH_RATE,
                   priorSigma=PRIOR_SIGMA,
                   logGrowthModel=None,
                   databasePath=DATABASE_PATH,
                   nLimitRegions=None,
                   **kwargs
                   ):
    """Generate forecasts for regions

    Parameters
    ----------
    regionName: A country key of Cryostation, or 'all'
    target: 'confirmed' or 'deaths'
    predictionsPercentiles: The posterior percentiles to compute
    siteData: The directory for output data
    regionType: 'country' or 'stateUS'
    priorLogCarryingCapacity
    priorMidPoint
    priorGrowthRate
    priorSigma
    logGrowthModel: A compiled pystan model
    databasePath: Path to virustrack.db
    nLimitRegions: Maximum number of regions to train in alphabetical order
    kwargs: Optional named arguments for covidvu.predictLogisticGrowth

    Returns
    -------
    JSON dump of mean prediction and confidence intervals
    """
    if logGrowthModel is None:
        print('Building model. This may take a few moments...')
        logGrowthModel = buildLogisticModel(priorLogCarryingCapacity= priorLogCarryingCapacity,
                                            priorMidPoint=priorMidPoint,
                                            priorGrowthRate=priorGrowthRate,
                                            priorSigma=priorSigma,
                                            )
        print('Done.')
    else:
        assert isinstance(logGrowthModel, StanModel)

    if regionName == 'all':
        if regionType == 'country':
            with Cryostation(databasePath) as cs:
                countries = cs.allCountryNames()
            for i, country in enumerate(countries):
                print(f'Training {country}')
                if nLimitRegions:
                    if i > nLimitRegions-1:
                        break

                prediction = predictLogisticGrowth(logGrowthModel,
                                                   country,
                                                   regionType=regionType,
                                                   predictionsPercentiles=predictionsPercentiles,
                                                   target=target,
                                                   **kwargs
                                                   )
                _dumpRegionPrediction(prediction, siteData, predictionsPercentiles,
                                      meanFilename=PREDICTION_MEAN_JSON_FILENAME_WORLD,
                                      confIntFilename=PREDICTION_CI_JSON_FILENAME_WORLD, )
                print('Done.')
        elif regionType == 'stateUS':
            with Cryostation(databasePath) as cs:
                statesUS = cs.allProvincesOf('US')
            for i, state in enumerate(statesUS):
                if nLimitRegions:
                    if i > nLimitRegions:
                        break
                print(f'Training {state}')
                prediction = predictLogisticGrowth(logGrowthModel,
                                                   state,
                                                   regionType=regionType,
                                                   predictionsPercentiles=predictionsPercentiles,
                                                   target=target,
                                                   **kwargs
                                                   )
                _dumpRegionPrediction(prediction, siteData, predictionsPercentiles,
                                      meanFilename=PREDICTION_MEAN_JSON_FILENAME_US,
                                      confIntFilename=PREDICTION_CI_JSON_FILENAME_US, )
                print('Done.')
        else:
            raise ValueError(f'regionType = {regionType} not understood')
    else:
        print(f'Training {regionName}')
        prediction = predictLogisticGrowth(logGrowthModel,
                                           regionName,
                                           regionType=regionType,
                                           predictionsPercentiles=predictionsPercentiles,
                                           target=target,
                                           **kwargs,
                                           )
        if regionType == 'country':
            _dumpRegionPrediction(prediction, siteData, predictionsPercentiles,
                                  meanFilename=PREDICTION_MEAN_JSON_FILENAME_WORLD,
                                  confIntFilename=PREDICTION_CI_JSON_FILENAME_WORLD, )
        elif regionType == 'stateUS':
            _dumpRegionPrediction(prediction, siteData, predictionsPercentiles,
                                  meanFilename=PREDICTION_MEAN_JSON_FILENAME_US,
                                  confIntFilename=PREDICTION_CI_JSON_FILENAME_US, )

    print('Done.')