def loadData(filename=None, useThuringia=True, pullData=False, lastDate=None, correctDeaths=False, UseRefDead=True): import os basePath = os.getcwd() if basePath.endswith('Examples'): basePath = basePath[:-9] # to remove the Examples bit if useThuringia: if filename is None: filename = r"COVID-19 Linelist 2020_04_22.xlsx" basePathT = r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\PetraDickmann" # Thuringia = pd.read_excel(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\COVID-19 Linelist 2020_04_06.xlsx") Thuringia = pd.read_excel(basePathT + sep + filename) Thuringia = stripQuotesFromAxes(Thuringia) AllMeasured, day1, numdays = binThuringia(Thuringia, lastDate=lastDate) # AllMeasured, day1, numdays = imputation(Thuringia) AllMeasured['Region'] = "Thuringia" df = pd.read_excel(basePath + r"\Examples\bev_lk.xlsx" ) # support information about the population AllMeasured.update(addOtherData( Thuringia, df, day1, numdays)) # adds the supplemental information else: import os # r"C:\Users\pi96doc\Documents\Programming\PythonScripts\StateModeling" if pullData: data = fetch_data.DataFetcher().fetch_german_data() # with open(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\Global_Mobility_Report.csv", 'r', encoding="utf8") as f: # mobility = list(csv.reader(f, delimiter=",")) # mobility = np.array(mobility[1:], dtype=np.float) #print(data['AnzahlTodesfall']) # DEBUG #print(data['AnzahlTodesfall']) # DEBUG if correctDeaths: data['AnzahlTodesfall'] = 0 data['NeuerTodesfall'] = -9 correct_deaths = pd.read_csv('~' + os.sep + 'Dokumente' + os.sep + 'RKI-Daten' + os.sep + 'Deaths_RKI_Format_new.csv') data = data.append(correct_deaths, ignore_index=True) print(data) # DEBUG data = data.fillna(0) AllMeasured, day1, numdays = imputation( data, useRefDead=UseRefDead, correctDeaths=correctDeaths) df = pd.read_excel( basePath + sep + r"Examples" + sep + "bev_lk.xlsx") # support information about the population # AllMeasured, day1, numdays = cumulate(data, df) AllMeasured.update(addOtherData( data, df, day1, numdays)) # adds the supplemental information np.save(basePath + sep + r'Data' + sep + 'AllMeasured', AllMeasured) # can be checked with # https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Situationsberichte/2020-04-16-de.pdf?__blob=publicationFile else: AllMeasured = np.load(basePath + sep + r'Data' + sep + 'AllMeasured.npy', allow_pickle=True).item() AllMeasured['Region'] = "Germany" AgePop = np.array( [(3.88 + 0.78), 6.62, 2.31 + 2.59 + 3.72 + 15.84, 23.9, 15.49, 7.88], stm.CalcFloatStr) AgePop /= np.sum(AgePop) PopM = AgePop[np.newaxis, :] * AllMeasured['PopM'][:, np.newaxis] PopW = AgePop[np.newaxis, :] * AllMeasured['PopW'][:, np.newaxis] AllMeasured['Population'] = np.stack((PopM, PopW), -1) # mobility only to 11.04.2020: #mobility = pd.read_csv(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\Global_Mobility_Report.csv", low_memory=False) #mobdat = mobility[mobility['sub_region_1'] == "Thuringia"] #AllMeasured['mobility'] = mobdat return AllMeasured
import fetch_data from cleaning_data import * from adapt_data import * from build_model_naive_bayes import * from svm_classifier import * # fetch data dataFetcher = fetch_data.DataFetcher() #dataFetcher.fetch() dataFetcher.extractTags() # cleaning data cleaning_data() # adapt data 'question;;un_seul_tag' adapt_data_to_naive_bayes() # build model clf, vectorizer = build_naive_bayes_model() #naive_bayes #clf, vectorizer=build_svm_model() #svm def predict(q): qst = np.array([q]) qst_vector = vectorizer.transform(qst) print("Question : " + str(q) + " ==Prediction==> " + str(clf.predict(qst_vector))) # some predicts
def loadData(filename=None, useThuringia=True, pullData=False, lastDate=None, correctDeaths=False, UseRefDead=True, DeathData=None, usePreprocessed=False): import os basePath = os.getcwd() #if correctDeaths and not pullData: # raise ValueError('correctDeath only makes sense when using pullData. Please also activate pullData') if basePath.endswith('Examples'): basePath = basePath[:-9] # to remove the Examples bit if useThuringia: if filename is None: filename = r"COVID-19 Linelist 2020_04_22.xlsx" basePathT = r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\PetraDickmann" # Thuringia = pd.read_excel(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\COVID-19 Linelist 2020_04_06.xlsx") Thuringia = pd.read_excel(basePathT + sep + filename) Thuringia = stripQuotesFromAxes(Thuringia) AllMeasured, day1, numdays = binThuringia(Thuringia, lastDate=lastDate) # AllMeasured, day1, numdays = imputation(Thuringia) AllMeasured['Region'] = "Thuringia" df = pd.read_excel(basePath + r"\Examples\bev_lk.xlsx" ) # support information about the population AllMeasured.update(addOtherData( Thuringia, df, day1, numdays)) # adds the supplemental information else: if usePreprocessed: # Michael's Datastructure import sys mydir = os.path.dirname(os.path.realpath(__file__)) sys.path.insert( 1, mydir + os.sep + '..' + os.sep + '..' + os.sep + 'RKI_COVID19' ) # relative path from Examples to the RKI_COVID19 folder DataDir = 'C:\\NoBackup\\Data\\NextCloudUni\\COVID-Data\\' from RKI_COVID19_Collection import RKI_COVID19_Collection db = RKI_COVID19_Collection( ) # if empty the environment is used. processed_data\\data_DE_RKI_processed\\data_DE_RKI_processed\\RKI_COVID19_processed.csv # shows the list of dates # db.print_Statistics() # do the processing # db.process(verbose=True) print('loading preprocessed data ...') db.load_df() print('.done\n') # delete all cases w/o symptom onset: # db.pdf.drop(db.pdf[db.pdf.IstErkrankungsbeginn == 0].index, inplace=True) AllMeasured, day1, numdays = imputation( db.pdf, useRefDead=UseRefDead, correctDeaths=correctDeaths) df = pd.read_excel( basePath + sep + r"Examples" + sep + "bev_lk.xlsx") # support information about the population AllMeasured.update( addOtherData(db.pdf, df, day1, numdays)) # adds the supplemental information else: import os # r"C:\Users\pi96doc\Documents\Programming\PythonScripts\StateModeling" if pullData: data = fetch_data.DataFetcher().fetch_german_data() # with open(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\Global_Mobility_Report.csv", 'r', encoding="utf8") as f: # mobility = list(csv.reader(f, delimiter=",")) # mobility = np.array(mobility[1:], dtype=np.float) #print(data['AnzahlTodesfall']) # DEBUG #print(data['AnzahlTodesfall']) # DEBUG if correctDeaths: data['AnzahlTodesfall'] = 0 data['NeuerTodesfall'] = -9 if not DeathData: # DeathData = '~' + os.sep + 'Dokumente' + os.sep + 'RKI-Daten' + os.sep + 'Deaths_RKI_Format_new.csv' DeathData = '..' + os.sep + 'FromWeb' + os.sep + 'CoronaData' + os.sep + 'CSV-Dateien-mit-Covid-19-Infektionen-' + os.sep + 'Deaths_RKI_Format_new.csv' correct_deaths = pd.read_csv(DeathData) data = data.append(correct_deaths, ignore_index=True) print(data) # DEBUG data = data.fillna(0) AllMeasured, day1, numdays = imputation( data, useRefDead=UseRefDead, correctDeaths=correctDeaths) df = pd.read_excel( basePath + sep + r"Examples" + sep + "bev_lk.xlsx") # support information about the population # AllMeasured, day1, numdays = cumulate(data, df) AllMeasured.update( addOtherData(data, df, day1, numdays)) # adds the supplemental information np.save(basePath + sep + r'Data' + sep + 'AllMeasured', AllMeasured) # can be checked with # https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Situationsberichte/2020-04-16-de.pdf?__blob=publicationFile else: AllMeasured = np.load(basePath + sep + r'Data' + sep + 'AllMeasured.npy', allow_pickle=True).item() AllMeasured['Region'] = "Germany" AgePop = np.array( [(3.88 + 0.78), 6.62, 2.31 + 2.59 + 3.72 + 15.84, 23.9, 15.49, 7.88, 0.001], stm.CalcFloatStr) # The last ist just something for "unkown"? AgePop /= np.sum(AgePop) PopM = AgePop[np.newaxis, :] * AllMeasured['PopM'][:, np.newaxis] PopW = AgePop[np.newaxis, :] * AllMeasured['PopW'][:, np.newaxis] PopU = PopW * 0.00001 # just to have the unkown population not empty AllMeasured['Population'] = np.stack((PopM, PopW, PopU), -1) # AllMeasured['Population'] = AllMeasured['Population'](:,np.newaxis,:,:) # mobility only to 11.04.2020: #mobility = pd.read_csv(r"C:\Users\pi96doc\Documents\Anträge\Aktuell\COVID_Dickmann_2020\Global_Mobility_Report.csv", low_memory=False) #mobdat = mobility[mobility['sub_region_1'] == "Thuringia"] #AllMeasured['mobility'] = mobdat return AllMeasured
# This example is written for the new interface import StateModeling as stm import numpy as np import matplotlib.pyplot as plt import fetch_data import pandas as pd import tensorflow as tf basePath = r"C:\Users\pi96doc\Documents\Programming\PythonScripts\StateModeling" if False: data = fetch_data.DataFetcher().fetch_german_data() data_np = data.to_numpy() df = pd.read_excel( basePath + r"\Examples\bev_lk.xlsx") # support information about the population MeasDetected, MeasDead, SupportingInfo = stm.cumulate(data, df) np.save(basePath + r'\Data\MeasDetected', MeasDetected) np.save(basePath + r'\Data\MeasDead', MeasDead) np.save(basePath + r'\Data\SupportingInfo', SupportingInfo) else: MeasDetected = np.load(basePath + r'\Data\MeasDetected.npy') MeasDead = np.load(basePath + r'\Data\MeasDead.npy') SupportingInfo = np.load(basePath + r'\Data\SupportingInfo.npy', allow_pickle=True) (IDs, LKs, PopM, PopW, Area, Ages, Gender) = SupportingInfo # fit,data = stm.DataLoader().get_new_data() # axes = data.keys() # datp = data.pivot_table(values=['cases','deaths'], index=['id','day'], aggfunc=np.sum, fill_value=0) # data_np = datp.to_numpy() # NumIDs = data['id'].unique().shape