예제 #1
0
def loadData(filename=None,
             useThuringia=True,
             pullData=False,
             lastDate=None,
             correctDeaths=False,
             UseRefDead=True):
    import os
    basePath = os.getcwd()
    if basePath.endswith('Examples'):
        basePath = basePath[:-9]  # to remove the Examples bit

    if useThuringia:
        if filename is None:
            filename = r"COVID-19 Linelist 2020_04_22.xlsx"
        basePathT = r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\PetraDickmann"
        # Thuringia = pd.read_excel(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\COVID-19 Linelist 2020_04_06.xlsx")
        Thuringia = pd.read_excel(basePathT + sep + filename)
        Thuringia = stripQuotesFromAxes(Thuringia)
        AllMeasured, day1, numdays = binThuringia(Thuringia, lastDate=lastDate)
        # AllMeasured, day1, numdays = imputation(Thuringia)
        AllMeasured['Region'] = "Thuringia"

        df = pd.read_excel(basePath + r"\Examples\bev_lk.xlsx"
                           )  # support information about the population
        AllMeasured.update(addOtherData(
            Thuringia, df, day1, numdays))  # adds the supplemental information
    else:
        import os
        # r"C:\Users\pi96doc\Documents\Programming\PythonScripts\StateModeling"
        if pullData:
            data = fetch_data.DataFetcher().fetch_german_data()
            # with open(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\Global_Mobility_Report.csv", 'r', encoding="utf8") as f:
            #     mobility = list(csv.reader(f, delimiter=","))
            # mobility = np.array(mobility[1:], dtype=np.float)
            #print(data['AnzahlTodesfall']) # DEBUG
            #print(data['AnzahlTodesfall']) # DEBUG
            if correctDeaths:
                data['AnzahlTodesfall'] = 0
                data['NeuerTodesfall'] = -9
                correct_deaths = pd.read_csv('~' + os.sep + 'Dokumente' +
                                             os.sep + 'RKI-Daten' + os.sep +
                                             'Deaths_RKI_Format_new.csv')
                data = data.append(correct_deaths, ignore_index=True)
            print(data)  # DEBUG
            data = data.fillna(0)
            AllMeasured, day1, numdays = imputation(
                data, useRefDead=UseRefDead, correctDeaths=correctDeaths)
            df = pd.read_excel(
                basePath + sep + r"Examples" + sep +
                "bev_lk.xlsx")  # support information about the population
            # AllMeasured, day1, numdays = cumulate(data, df)
            AllMeasured.update(addOtherData(
                data, df, day1, numdays))  # adds the supplemental information
            np.save(basePath + sep + r'Data' + sep + 'AllMeasured',
                    AllMeasured)

            # can be checked with
            # https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Situationsberichte/2020-04-16-de.pdf?__blob=publicationFile
        else:
            AllMeasured = np.load(basePath + sep + r'Data' + sep +
                                  'AllMeasured.npy',
                                  allow_pickle=True).item()

        AllMeasured['Region'] = "Germany"
    AgePop = np.array(
        [(3.88 + 0.78), 6.62, 2.31 + 2.59 + 3.72 + 15.84, 23.9, 15.49, 7.88],
        stm.CalcFloatStr)
    AgePop /= np.sum(AgePop)
    PopM = AgePop[np.newaxis, :] * AllMeasured['PopM'][:, np.newaxis]
    PopW = AgePop[np.newaxis, :] * AllMeasured['PopW'][:, np.newaxis]
    AllMeasured['Population'] = np.stack((PopM, PopW), -1)

    # mobility only to 11.04.2020:
    #mobility = pd.read_csv(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\Global_Mobility_Report.csv", low_memory=False)
    #mobdat = mobility[mobility['sub_region_1'] == "Thuringia"]
    #AllMeasured['mobility'] = mobdat

    return AllMeasured
예제 #2
0
import fetch_data
from cleaning_data import *
from adapt_data import *
from build_model_naive_bayes import *
from svm_classifier import *

# fetch data
dataFetcher = fetch_data.DataFetcher()
#dataFetcher.fetch()
dataFetcher.extractTags()

# cleaning data
cleaning_data()

# adapt data 'question;;un_seul_tag'
adapt_data_to_naive_bayes()

# build model

clf, vectorizer = build_naive_bayes_model()  #naive_bayes
#clf, vectorizer=build_svm_model() #svm


def predict(q):
    qst = np.array([q])
    qst_vector = vectorizer.transform(qst)
    print("Question : " + str(q) + "   ==Prediction==>   " +
          str(clf.predict(qst_vector)))


# some predicts
예제 #3
0
def loadData(filename=None,
             useThuringia=True,
             pullData=False,
             lastDate=None,
             correctDeaths=False,
             UseRefDead=True,
             DeathData=None,
             usePreprocessed=False):
    import os
    basePath = os.getcwd()
    #if correctDeaths and not pullData:
    #    raise ValueError('correctDeath only makes sense when using pullData. Please also activate pullData')

    if basePath.endswith('Examples'):
        basePath = basePath[:-9]  # to remove the Examples bit

    if useThuringia:
        if filename is None:
            filename = r"COVID-19 Linelist 2020_04_22.xlsx"
        basePathT = r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\PetraDickmann"
        # Thuringia = pd.read_excel(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\COVID-19 Linelist 2020_04_06.xlsx")
        Thuringia = pd.read_excel(basePathT + sep + filename)
        Thuringia = stripQuotesFromAxes(Thuringia)
        AllMeasured, day1, numdays = binThuringia(Thuringia, lastDate=lastDate)
        # AllMeasured, day1, numdays = imputation(Thuringia)
        AllMeasured['Region'] = "Thuringia"

        df = pd.read_excel(basePath + r"\Examples\bev_lk.xlsx"
                           )  # support information about the population
        AllMeasured.update(addOtherData(
            Thuringia, df, day1, numdays))  # adds the supplemental information
    else:
        if usePreprocessed:  # Michael's Datastructure
            import sys
            mydir = os.path.dirname(os.path.realpath(__file__))
            sys.path.insert(
                1,
                mydir + os.sep + '..' + os.sep + '..' + os.sep + 'RKI_COVID19'
            )  # relative path from Examples to the RKI_COVID19 folder
            DataDir = 'C:\\NoBackup\\Data\\NextCloudUni\\COVID-Data\\'
            from RKI_COVID19_Collection import RKI_COVID19_Collection
            db = RKI_COVID19_Collection(
            )  # if empty the environment is used. processed_data\\data_DE_RKI_processed\\data_DE_RKI_processed\\RKI_COVID19_processed.csv

            # shows the list of dates
            # db.print_Statistics()

            # do the processing
            # db.process(verbose=True)
            print('loading preprocessed data ...')
            db.load_df()
            print('.done\n')
            # delete all cases w/o symptom onset:
            # db.pdf.drop(db.pdf[db.pdf.IstErkrankungsbeginn == 0].index, inplace=True)

            AllMeasured, day1, numdays = imputation(
                db.pdf, useRefDead=UseRefDead, correctDeaths=correctDeaths)
            df = pd.read_excel(
                basePath + sep + r"Examples" + sep +
                "bev_lk.xlsx")  # support information about the population
            AllMeasured.update(
                addOtherData(db.pdf, df, day1,
                             numdays))  # adds the supplemental information
        else:
            import os
            # r"C:\Users\pi96doc\Documents\Programming\PythonScripts\StateModeling"
            if pullData:
                data = fetch_data.DataFetcher().fetch_german_data()
                # with open(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\Global_Mobility_Report.csv", 'r', encoding="utf8") as f:
                #     mobility = list(csv.reader(f, delimiter=","))
                # mobility = np.array(mobility[1:], dtype=np.float)
                #print(data['AnzahlTodesfall']) # DEBUG
                #print(data['AnzahlTodesfall']) # DEBUG
                if correctDeaths:
                    data['AnzahlTodesfall'] = 0
                    data['NeuerTodesfall'] = -9
                    if not DeathData:
                        # DeathData = '~' + os.sep + 'Dokumente' + os.sep + 'RKI-Daten' + os.sep + 'Deaths_RKI_Format_new.csv'
                        DeathData = '..' + os.sep + 'FromWeb' + os.sep + 'CoronaData' + os.sep + 'CSV-Dateien-mit-Covid-19-Infektionen-' + os.sep + 'Deaths_RKI_Format_new.csv'
                    correct_deaths = pd.read_csv(DeathData)
                    data = data.append(correct_deaths, ignore_index=True)
                print(data)  # DEBUG
                data = data.fillna(0)
                AllMeasured, day1, numdays = imputation(
                    data, useRefDead=UseRefDead, correctDeaths=correctDeaths)
                df = pd.read_excel(
                    basePath + sep + r"Examples" + sep +
                    "bev_lk.xlsx")  # support information about the population
                # AllMeasured, day1, numdays = cumulate(data, df)
                AllMeasured.update(
                    addOtherData(data, df, day1,
                                 numdays))  # adds the supplemental information
                np.save(basePath + sep + r'Data' + sep + 'AllMeasured',
                        AllMeasured)

                # can be checked with
                # https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Situationsberichte/2020-04-16-de.pdf?__blob=publicationFile
            else:
                AllMeasured = np.load(basePath + sep + r'Data' + sep +
                                      'AllMeasured.npy',
                                      allow_pickle=True).item()

        AllMeasured['Region'] = "Germany"
    AgePop = np.array(
        [(3.88 + 0.78), 6.62, 2.31 + 2.59 + 3.72 + 15.84, 23.9, 15.49, 7.88,
         0.001], stm.CalcFloatStr)  # The last ist just something for "unkown"?
    AgePop /= np.sum(AgePop)

    PopM = AgePop[np.newaxis, :] * AllMeasured['PopM'][:, np.newaxis]
    PopW = AgePop[np.newaxis, :] * AllMeasured['PopW'][:, np.newaxis]
    PopU = PopW * 0.00001  # just to have the unkown population not empty
    AllMeasured['Population'] = np.stack((PopM, PopW, PopU), -1)
    # AllMeasured['Population'] = AllMeasured['Population'](:,np.newaxis,:,:)
    # mobility only to 11.04.2020:
    #mobility = pd.read_csv(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\Global_Mobility_Report.csv", low_memory=False)
    #mobdat = mobility[mobility['sub_region_1'] == "Thuringia"]
    #AllMeasured['mobility'] = mobdat

    return AllMeasured
예제 #4
0
# This example is written for the new interface
import StateModeling as stm
import numpy as np
import matplotlib.pyplot as plt
import fetch_data
import pandas as pd
import tensorflow as tf

basePath = r"C:\Users\pi96doc\Documents\Programming\PythonScripts\StateModeling"
if False:
    data = fetch_data.DataFetcher().fetch_german_data()
    data_np = data.to_numpy()
    df = pd.read_excel(
        basePath +
        r"\Examples\bev_lk.xlsx")  # support information about the population
    MeasDetected, MeasDead, SupportingInfo = stm.cumulate(data, df)
    np.save(basePath + r'\Data\MeasDetected', MeasDetected)
    np.save(basePath + r'\Data\MeasDead', MeasDead)
    np.save(basePath + r'\Data\SupportingInfo', SupportingInfo)
else:
    MeasDetected = np.load(basePath + r'\Data\MeasDetected.npy')
    MeasDead = np.load(basePath + r'\Data\MeasDead.npy')
    SupportingInfo = np.load(basePath + r'\Data\SupportingInfo.npy',
                             allow_pickle=True)
(IDs, LKs, PopM, PopW, Area, Ages, Gender) = SupportingInfo

# fit,data = stm.DataLoader().get_new_data()
# axes = data.keys()
# datp = data.pivot_table(values=['cases','deaths'], index=['id','day'], aggfunc=np.sum, fill_value=0)
# data_np = datp.to_numpy()
# NumIDs = data['id'].unique().shape