def del_zip_crypt(df, path, password): df = deleteItems(df, '年代:公表の可否', '年代') df = deleteItems(df, '性別:公表の可否', '性別') df = deleteItems(df, '居住地:公表の可否', '居住地') df = deleteItems(df, '居住地:公表の可否', '居住地2') df = deleteItems(df, '職業:公表の可否', '職業分類') df = deleteItems(df, '職業:公表の可否', '職業2') df = deleteItems(df, '職業:公表の可否', '職業:備考') df = deleteItems(df, '発症日:公表の可否', '発症日') for c in df.columns: if "*" in c: del df[c] # save and crypt data dir_ = _utils.getOutputDir() pathOutput = dir_ + path.split("/")[-1][:-5] + "_共有用.xlsx" sheet = pL.patientSheetName with pd.ExcelWriter(pathOutput, engine='openpyxl', mode='wa', datetime_format='yyyy/mm/dd') as writer: df.to_excel(writer, startrow=1, sheet_name=sheet, index=False) pathZip = pathOutput[:-5] + ".zip" pyminizip.compress(pathOutput, "", pathZip, password, 2)
def diagnosticEngineering(df,pullDown,path): phcVals = pullDown[phcCol].dropna().values ageVals = pullDown[ageCol].dropna().values sexVals = pullDown[sexCol].dropna().values dfM = df.copy() dfM["count"] = 1 dfM = dataCleaning(dfM) regTable = _utils.basicTable(dfM, phcCol,phcVals) ageTable = _utils.basicTable(dfM,ageCol,ageVals) sexTable = _utils.basicTable(dfM,sexCol,sexVals) dateTable = dateTabling(dfM,testDateCol) regDateTable = regDateCrossTab(dfM,col1=testDateCol,col2=phcCol) regDateCumTable = regDateTable.cumsum() dir_ = _utils.getOutputDir() path2Save1 = dir_ + path.split("/")[-1][:-5] + f"_陽性者集計1.xlsx" path2Save2 = dir_ + path.split("/")[-1][:-5] + f"_陽性者集計2.xlsx" with pd.ExcelWriter(path2Save1, engine="openpyxl", mode="wa") as writer: #dfError = _utils.createErrorCheckDF(path) #dfError.to_excel(writer, sheet_name="エラー確認") regTable.to_excel(writer ,sheet_name="保健所") ageTable.to_excel(writer, sheet_name="年齢") sexTable.to_excel(writer, sheet_name="性別") dateTable.to_excel(writer, sheet_name="日付") regDateTable.to_excel(writer, sheet_name="日付-地域") regDateCumTable.to_excel(writer, sheet_name="日付-地域-累積") regDateAgeCrossTab(dfM,path2Save2,path,phcVals,ageVals,testDateCol,ageCol)
def diagnosticEngineering(df, path): df = df.dropna(axis=0, how="all") # retrieve dataframe of diagnostic testing dfM = df.copy() dfM = dataCleaning(dfM) # create crosstabulation data totalDF = allTotal(dfM) regionDF = crossTab(dfM, phcCol, resultCol) ageDF = crossTab(dfM, ageCol, resultCol) sexDF = crossTab(dfM, sexCol, resultCol) dateDF = dateCrossTab(dfM) dateCumDF = dateCrossTab(dfM, cum=True) regDateDF = regDateCrossTab(dfM) dir_ = _utils.getOutputDir() path2Save1 = dir_ + path.split("/")[-1][:-5] + "_診断検査1.xlsx" path2Save2 = dir_ + path.split("/")[-1][:-5] + "_診断検査2.xlsx" with pd.ExcelWriter(path2Save1, engine="openpyxl", mode="wa") as writer: #dfError = createErrorCheckDF(path) #dfError.to_excel(writer, sheet_name="エラー確認") totalDF.to_excel(writer, sheet_name="合計") regionDF.to_excel(writer, sheet_name="地域") ageDF.to_excel(writer, sheet_name="年齢") sexDF.to_excel(writer, sheet_name="性別") dateDF.to_excel(writer, sheet_name="日付単集計") dateCumDF.to_excel(writer, sheet_name="日付累積") regDateDF.to_excel(writer, sheet_name="日付地域") regDateAgeCrossTab(dfM, path2Save2)
def preFinishProcess(): dir_ = _utils.getOutputDir(create=False) if os.path.exists(dir_): index = dir_.rfind("_") dirNew = dir_[:index] + "_" +\ datetime.datetime.now().strftime('%Y%m%d_%H%M%S') os.rename(dir_, dirNew)
def tableProcessing(df1, path): # datetime conversion for c in colList: try: df1[c] = df1[c].dt.date except: pass # get death date deathCol = "死亡日" status = "転帰" cond = df1[status] == "02_死亡" df1[deathCol] = np.nan df1.loc[cond, deathCol] = df1.loc[cond, outCol] # tabling df1['count'] = 1 posi = df1[posCol].value_counts() posi = posi.to_frame() table = tablingMerge(posi, df1, onsetCol) table = tablingMerge(table, df1, inHospCol) table = tablingMerge(table, df1, negSt) table = tablingMerge(table, df1, negConf) table = tablingMerge(table, df1, outCol) table = tablingMerge(table, df1, deathCol) table = _utils.imputateDate(table) table = table.replace(np.nan, 0) table['累積陽性者数'] = table['陽性確定日'].cumsum() table['累積陰性者数'] = table['陰性結果確認日'].cumsum() table['累積死亡者数'] = table['死亡日'].cumsum() table['累積退院者数'] = table['退院日'].cumsum() table['累積入院者数'] = table['入院日'].cumsum() table['累積発症者数'] = table['発症日'].cumsum() table = table.rename( columns={ '陽性確定日': '陽性者数', '発症日': '発症者数', '入院日': '入院者数', '陰性結果開始日': '陰性検査開始数', '陰性結果確認日': '陰性者数', '退院日': '退院者数', '死亡日': '死亡者数', }) table['現在患者数'] = table['累積入院者数'] - table['累積退院者数'] dir_ = _utils.getOutputDir() pathOutput = dir_ + path.split("/")[-1][:-5] + "_table7.xlsx" with pd.ExcelWriter(pathOutput, engine='openpyxl', mode='w') as writer: table.to_excel(writer, sheet_name="日付別集計"),
def crossTabulation(df, path): hc_occur = _utils.basicTable(df, '保健所', '発生状況(公表)') age_hcSex = _utils.basicTable(df, '年代', ['保健所', '性別']) hosp_bedUnhosp = _utils.basicTable(df, '入院医療機関(現在)', ['入院病床(現在)', '退院の有無']) age_sexResult = _utils.basicTable(df, '年代', ['性別', '身体状況(現在の症状)']) dir_ = _utils.getOutputDir() pathOutput = dir_ + path.split("/")[-1][:-5] + f"_層別集計.xlsx" with pd.ExcelWriter(pathOutput, engine='openpyxl', mode='w') as writer: hc_occur.to_excel(writer, sheet_name="保健所×発生状況"), age_hcSex.to_excel(writer, sheet_name="年代×保健所性別"), hosp_bedUnhosp.to_excel(writer, sheet_name="入院医療機関×病床退院"), age_sexResult.to_excel(writer, sheet_name="年代×性別現在の状況"),
def main(dir_): path2xlsx = dir_ + "/*.xlsx" allFilePaths = glob.glob(path2xlsx) extractFilePaths = [i for i in allFilePaths if '~$' not in i] openFilePaths = [i for i in allFilePaths if '~$' in i] openFileNames = [s.replace(dir_ + "/", '') for s in openFilePaths] if openFileNames != []: print("フォルダ内に一時ファイルが存在します。ファイルが開いたままである可能性があります。確認して下さい。") for i in openFileNames: print("対象ファイル名:" + i) sys.exit() df1 = pd.DataFrame() df2 = pd.DataFrame() for path in extractFilePaths: dfPre1 = pd.read_excel(path, sheet_name=sheet_1_name, encoding="cp932", header=None) dfPre4 = pd.read_excel(path, sheet_name=sheet_4_name, encoding="cp932", header=None) dfPre1 = insertOneRowCol(dfPre1) dfPre4 = insertOneRowCol(dfPre4) df1 = createMedicalHistoryDF(dfPre1, df1) df2 = createContactPersonsDF(dfPre1, dfPre4, df2) extractFileNames = [s.replace(dir_ + "/", '') for s in extractFilePaths] extractFileNames.sort() dic_path = {"使用したファイル": extractFileNames} df3 = pd.DataFrame(dic_path) dir_ = _utils.getOutputDir() pathOutput = dir_ + "積極的疫学調査調査票抽出データ.xlsx" with pd.ExcelWriter(pathOutput, engine="openpyxl", mode="w", datetime_format='yyyy/mm/dd') as writer: df1.to_excel(writer, sheet_name="既往歴", index=False) df2.to_excel(writer, sheet_name="接触者リスト", index=False) df3.to_excel(writer, sheet_name="使用したファイル", index=False) print("積極的疫学調査調査票抽出データ.xlsxが作成されました。")
def runItemAll(self): # read files try: df, pullDown, test, hospital = self.readFiles() _utils.createErrorCheckFile(self.fileName1, program="全実行") _utils.createErrorCheckFile(self.fileName2, program="全実行") _utils.createErrorCheckFile(self.fileName3, program="全実行") except: errorMsg = "予期せぬエラーが発生しました\n" import traceback errorMsg += traceback.format_exc() self.runErrorCheck(errorMsg) return () # run program one by one. #import test_totaling #test_totaling.diagnosticEngineering(test,self.fileName2) import merge_test_result merge_test_result.mergeTest(df, test, self.fileName1) dir_ = _utils.getOutputDir() pathNew = dir_ + self.fileName1.split("/")[-1][:-5] + "_rep.xlsx" dfNew = pd.read_excel(pathNew, sheet_name=pL.patientSheetName, encoding="cp932", header=1) import patient_basic_info patient_basic_info.diagnosticEngineering(dfNew, pullDown, pathNew) import in_hosp_status in_hosp_status.mergeProcessFiles(dfNew, hospital) in_hosp_status.formatExcelStyle() import simple_in_hosp_status simple_in_hosp_status.conversion(dfNew, pathNew) simple_in_hosp_status.formatExcelStyle(pathNew) import patient_info_totaling patient_info_totaling.tableProcessing(dfNew, pathNew) import patient_daily_totaling patient_daily_totaling.tableProcessing(dfNew, pathNew) import output_A4_format output_A4_format.dataProcessing(dfNew, pathNew)
def dataProcessing(df,path): df = df.replace({"(.*)\xa0":r"\1"},regex=True) df = df.replace({"(.*)\n":r"\1"},regex=True) inDate = "入院日" outDate = "退院日" df[inDate] = _utils.convertDatetime(df,inDate) df[outDate] = _utils.convertDatetime(df,outDate) df[inDate] = pd.to_datetime(df[inDate].astype(str), format='%Y-%m-%d') df[outDate] = pd.to_datetime(df[outDate].astype(str), format='%Y-%m-%d') job2 = '職業2' jobCom = "職業:備考" df[job2] = df[job2].replace(np.nan,"") df[jobCom] = df[jobCom].replace(np.nan,"") df[job2] = df[job2] + "," + df[jobCom] df[job2] = df[job2].replace(',','') df1 = df[pL.A4OutputCols].copy() underScoreList = ['保健所', '性別','国籍', '職業分類', '濃厚接触者の観察状況(公表)', '入院医療機関(現在)','身体状況(現在の症状)' ,'入院病床(現在)', '治療機器' ] for c in underScoreList: df1[c] = df1[c].replace({".+_(.+)":r"\1"},regex=True) df1 = df1.rename(columns=pL.repOutput) for i, a in zip(df1.index, df1['氏名'].values): var = name(str(a)) df1.loc[i,'氏名'] = var #df1['氏名'].value_counts() df2 = df1.copy() dir_ = _utils.getOutputDir() pathOutput = dir_ + path.split("/")[-1][:-5] + "_A4format.xlsx" with pd.ExcelWriter(pathOutput, engine = 'openpyxl' , mode='wa', datetime_format='mm/dd') as writer: df2.to_excel(writer, sheet_name='最新', index=False) excel_formatting(pathOutput)
def tableProcessing(df, path): df = df.dropna(axis=0, how="all") for c in [phc, statusPub, outOrNot, physicalStatus]: df[c] = df[c].replace(np.nan, '空白') df["count"] = 1 tablePub = getStatusTable(df, statusPub) tablePhysical = _utils.basicTable(df, phc, physicalStatus) tableHosp = getHospitalTable(df) dir_ = _utils.getOutputDir() pathOutput = dir_ + path.split("/")[-1][:-5] + f"_発生状況.xlsx" with pd.ExcelWriter(pathOutput, engine="openpyxl", mode="wa") as writer: tablePub.to_excel(writer, sheet_name="公表") tablePhysical.to_excel(writer, sheet_name="身体状況") tableHosp.to_excel(writer, sheet_name="入院状況")
def mergeTest(df, test, path1): shName = pL.patientSheetName merged = createPosDataFrame(df, test) negStSer, negPassSer = createNegSeries(df, test) # replace values dir_ = _utils.getOutputDir() pathOutput = dir_ + path1.split("/")[-1][:-5] + "_rep.xlsx" wb = load_workbook(filename=path1) ws = wb[shName] colIndex = _utils.getColIndex(ws, "陽性確定日") replaceRowValues(ws, series_=merged, col=colIndex) # colIndex = _utils.getColIndex(ws,"陰性結果開始日") # replaceRowValues(ws,series_=negStSer ,col=colIndex,condLis=pL.testFilter) # colIndex = _utils.getColIndex(ws,"陰性結果確認日") # replaceRowValues(ws,series_=negPassSer,col=colIndex,condLis=pL.testFilter) wb.save(pathOutput)
def runProgram(self): self.statusLbl.setText("実行中") self.statusLbl.repaint() QTest.qWait(0.5) self.errorMsg = "" # プログラムごとの対応 if self.fileFlag: try: callRunProgram = lambda x: self.Items[x].get( "run", self.runErorr)() callRunProgram(self.combo.currentText()) for f in [ self.fileName, self.fileName1, self.fileName2, self.fileName3 ]: if f: _utils.createErrorCheckFile( f, program=self.combo.currentText()) except: dir_ = _utils.getOutputDir() fileErrorRecord = "errorRecord.txt" pathErorrRecord = dir_ + fileErrorRecord self.errorMsg += "予期せぬエラーが発生しました。\n" self.errorMsg += f"{fileErrorRecord} にエラーを出力してます。" with open(pathErorrRecord, "w") as f: import traceback errorRecord = traceback.format_exc() f.write(errorRecord) else: self.errorMsg += ".xlsxのファイルを選択してから実行してください。" self.runErrorCheck(self.errorMsg) self.statusLbl.repaint()
def daily_tabling(df, path): df["count"] = 1 df = df.replace({"(.)\xa0": r"\1"}, regex=True) df = df.replace({"(.)\n": r"\1"}, regex=True) posi = DailyTotal(df, '確定日(公表)', '陽性者数', '累積陽性者数') negStart = DailyTotal(df, '陰性結果開始日', '陰性結果開始数', '累積陰性結果開始数') negEnd = DailyTotal(df, '陰性結果確認日', '陰性者数', '累積陰性者数') inhosp = DailyTotal(df, '入院日', '入院者数', '累積入院者数') outhosp = DailyTotal(df, '退院日', '退院者数', '累積退院者数') death = DailyTotal2(df, '退院日', '身体状況(現在の症状)') occur = DailyTotal2(df, '確定日(公表)', '発生状況(公表)') dir_ = _utils.getOutputDir() pathOutput = dir_ + path.split("/")[-1][:-5] + f"_日付集計.xlsx" with pd.ExcelWriter(pathOutput, engine='openpyxl', mode='w') as writer: posi.to_excel(writer, sheet_name="陽性者数"), negStart.to_excel(writer, sheet_name="陰性結果開始数"), negEnd.to_excel(writer, sheet_name="陰性者数"), inhosp.to_excel(writer, sheet_name="入院者数"), outhosp.to_excel(writer, sheet_name="退院者数"), death.to_excel(writer, sheet_name="身体状況別"), occur.to_excel(writer, sheet_name="発生状況別"),
import _utils import pathList as pL import warnings warnings.simplefilter('ignore') if os.name == "nt": import locale locale.setlocale(locale.LC_CTYPE, "Japanese_Japan.932") program = "in_hosp_status.py" today = datetime.datetime.now().strftime('%y%m%d') dir_ = _utils.getOutputDir() output1 = dir_ + f"医療圏-機関-患者-対応表-公開用_{today}.xlsx" output2 = dir_ + f"医療圏-機関-患者-対応表_{today}.xlsx" # for global variables of formatExcelStyle() from openpyxl import Workbook, load_workbook from openpyxl.utils import get_column_letter from openpyxl.styles import Font, Color, colors, Border, Side, Alignment, PatternFill from openpyxl.utils.dataframe import dataframe_to_rows from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.styles.differential import DifferentialStyle from openpyxl.formatting import Rule # set colors darkGreyFill = PatternFill("solid", bgColor="A9A9A9", fgColor="A9A9A9") baseAlign = Alignment(wrapText=True, horizontal="center", vertical="top")