def get_starting_date(): q = session.query(DailyDiagnosticChangeModel) daily_change = q.order_by(DailyDiagnosticChangeModel.date).first() if daily_change is None: return date.today() else: return daily_change.date
def get_npa_list(): q = session.query(LocationModel) locations = q.all() geo_locations = [] for location in locations: geo_locations.append(str(location.postal_code)) return geo_locations
def load(): q = session.query(LocationModel) locations = q.all() all_locations = (loc.serialize() for loc in locations) df = pd.DataFrame(all_locations) return df
def test_multiple_persons_by_npa(): persons = 100 timestamp = int(time.time()) npa = 3240 for id in range(0,persons): doc_id = str(uuid.uuid4())[0:10] print(doc_id) report = IndividualReportModel( document_id=doc_id, diagnostic=random.randint(0, 4), locator=npa, session_id=str(uuid.uuid4()), timestamp=timestamp, analysis_done=False ) session.add(report) session.commit() analysis_next_report(persons) _date = datetime.fromtimestamp(timestamp) print(_date) q = session.query(DailyDiagnosticChangeModel) daily_change = q.filter_by(locator=npa, year=_date.year, month=_date.month, day=_date.day).first() total = daily_change.diagnostic_0 + \ daily_change.diagnostic_1 + \ daily_change.diagnostic_2 + \ daily_change.diagnostic_3 + \ daily_change.diagnostic_4 assert persons == total
def test_one_person_update(): nb_update = 1000 timestamps = range(1584892474, 1584892474 + nb_update) session_id = str(uuid.uuid4()) npa = 1246 for timestamp in timestamps: doc_id = str(uuid.uuid4())[0:10] print(doc_id) report = IndividualReportModel( document_id=doc_id, diagnostic=random.randint(0, 4), locator=npa, session_id=session_id, timestamp=timestamp, analysis_done=False ) session.add(report) session.commit() analysis_next_report(nb_update) _date = datetime.fromtimestamp(timestamps[0]) q = session.query(DailyDiagnosticChangeModel) daily_change = q.filter_by(locator=npa, year=_date.year, month=_date.month, day=_date.day).first() total = daily_change.diagnostic_0 + \ daily_change.diagnostic_1 + \ daily_change.diagnostic_2 + \ daily_change.diagnostic_3 + \ daily_change.diagnostic_4 print(total) assert total == 1
def report(): data = request.json print(data) # if token submitted # if token valid # insert report into database # else # generate new token # insert report into database # return token if 'token' in data.keys(): submitted_token = str(data['token']) if len(submitted_token) == 7: pass else: raise InvalidUsage("Invalid token") q = session.query(Token).filter_by(token=submitted_token).first() if q: try: report = ReportFactory.build(data) try: session.add(report) session.commit() except: session.rollback() abort(500, "Could not insert into database") except TypeError: raise InvalidUsage( "Some parameter was wrongly typed (string, int, array).") if report.has_comorbid: data['report']['comorbid']['parent_id'] = report.document_id try: comorbid = ComorbidFactory.build( data['report']['comorbid']) try: session.add(comorbid) session.commit() except: session.rollback() abort(500, "Could not insert into database") except TypeError: raise InvalidUsage( "Some parameter was wrongly typed (string, int, array)." ) return make_response("", 201) else: raise InvalidUsage("Provided token doesn't exist") elif 'report' in data.keys(): generated_token = generate_token() data['token'] = generated_token token = TokenFactory.build(generated_token) try: session.add(token) session.commit() except: session.rollback() abort(500, "Could not insert a new token into database") try: report = ReportFactory.build(data) try: session.add(report) session.commit() except: session.rollback() abort(500, "Could not insert into database") except TypeError: raise InvalidUsage( "Some parameter was wrongly typed (string, int, array).") if report.has_comorbid: data['report']['comorbid']['parent_id'] = report.document_id try: comorbid = ComorbidFactory.build(data['report']['comorbid']) try: session.add(comorbid) session.commit() except: session.rollback() abort(500, "Could not insert into database") except TypeError: raise InvalidUsage( "Some parameter was wrongly typed (string, int, array).") return make_response(jsonify({"token": generated_token}), 201) else: raise InvalidUsage("Required parameters are missing")
def all(): q = session.query(IndividualReportModel) individuals = q.all() return jsonify( individials=[individual.serialize() for individual in individuals])
def all_locations(): q = session.query(LocationModel) locations = q.all() return jsonify(locations=[loc.serialize() for loc in locations])
def analysis_next_report(collection_size: int): start_time_analysis = time.time() # load all previous reports report_done_df = pd.read_sql("SELECT * FROM individual_report WHERE analysis_done = 1", con=engine, index_col="document_id") # load the next collection of reports to analyse next_reports = pd.read_sql("SELECT * FROM individual_report WHERE analysis_done = 0 ORDER BY timestamp LIMIT " + str(collection_size), con=engine, index_col="document_id") # timestamp from millisecond to second report_done_df["timestamp"] = report_done_df["timestamp"] / 1000 next_reports["timestamp"] = next_reports["timestamp"] / 1000 # Hold the reports to update in the end report_to_update = list() # Hold the reports to update in the end daily_delta_df = pd.DataFrame(columns=['locator','year','month','day','0','1','2','3','4','5']) sample = 0 start_time = time.time() # iterate in all the report to analyse for doc_id, next_report in next_reports.iterrows(): sample += 1 _date = datetime.fromtimestamp(next_report['timestamp']) # to seconds delta = [0, 0, 0, 0, 0, 0] # search for last status same_session_id_df = report_done_df.loc[report_done_df['session_id'] == next_report['session_id']] if same_session_id_df.empty: delta[next_report['diagnostic']] = 1 #print('Delta (new user)',str(delta)) else: last_report = same_session_id_df.sort_values(by=['timestamp'], ascending=False).iloc[0] if last_report['diagnostic'] != next_report['diagnostic']: delta[last_report['diagnostic']] = -1 delta[next_report['diagnostic']] = 1 #print('Delta (existing user)', str(delta)) # check if daily delta already exist previous_daily = daily_delta_df.loc[(daily_delta_df['year'] == _date.year) & (daily_delta_df['month'] == _date.month) & (daily_delta_df['day'] == _date.day) & (daily_delta_df['locator'] == next_report['locator'])] if previous_daily.empty: # then create it new_daily = dict() new_daily['locator'] = next_report['locator'] new_daily['year'] = _date.year new_daily['month'] = _date.month new_daily['day'] = _date.day new_daily['0'] = delta[0] new_daily['1'] = delta[1] new_daily['2'] = delta[2] new_daily['3'] = delta[3] new_daily['4'] = delta[4] new_daily['5'] = delta[5] new_daily[str(next_report['diagnostic'])] = 1 daily_delta_df = daily_delta_df.append(new_daily, ignore_index=True) else: # then update it previous_daily = previous_daily.iloc[0] # convert dataframe to serie by ask the first (the only one) row # print('new diagnostic:',next_report['diagnostic']) daily_delta_df.at[previous_daily.name, "0"] = previous_daily["0"] + delta[0] daily_delta_df.at[previous_daily.name, "1"] = previous_daily["1"] + delta[1] daily_delta_df.at[previous_daily.name, "2"] = previous_daily["2"] + delta[2] daily_delta_df.at[previous_daily.name, "3"] = previous_daily["3"] + delta[3] daily_delta_df.at[previous_daily.name, "4"] = previous_daily["4"] + delta[4] daily_delta_df.at[previous_daily.name, "5"] = previous_daily["5"] + delta[5] if sample % 100 == 0: spend_time = time.time() - start_time print('Analysed ' + str(sample) + ' samples in ' + str(spend_time) + 's') start_time = time.time() # update report status next_report['analysis_done'] = 1 # keep trace of the report to update in DB #report_to_update_df = report_to_update_df.append(next_report) report_to_update.append(doc_id) # update the done report list report_done_df = report_done_df.append(next_report) #print('====== Report to update ========') #print(report_to_update) print('====== Daily change ========') print(daily_delta_df) # upload daily changes for doc_id, daily_delta in daily_delta_df.iterrows(): locator = daily_delta['locator'] year = daily_delta['year'] month = daily_delta['month'] day = daily_delta['day'] q = session.query(DailyDiagnosticChangeModel) daily_change = q.filter_by(locator=locator, year=year, month=month, day=day).first() if daily_change is None: # then create it daily_change = DailyDiagnosticChangeModel( locator=locator, year=year, month=month, day=day, diagnostic_0=daily_delta['0'], diagnostic_1=daily_delta['1'], diagnostic_2=daily_delta['2'], diagnostic_3=daily_delta['3'], diagnostic_4=daily_delta['4'], diagnostic_5=daily_delta['5'], ) session.add(daily_change) else: # update diagnostic daily_change.diagnostic_0 += daily_delta['0'] daily_change.diagnostic_1 += daily_delta['1'] daily_change.diagnostic_2 += daily_delta['2'] daily_change.diagnostic_3 += daily_delta['3'] daily_change.diagnostic_4 += daily_delta['4'] daily_change.diagnostic_5 += daily_delta['5'] session.commit() # build query to update individual reports ids = '(' idx = 0 for doc_id in report_to_update: if idx != 0: ids += ',' ids += '"'+doc_id+'"' idx += 1 ids += ')' query = 'UPDATE individual_report SET analysis_done=1 WHERE individual_report.document_id IN ' + ids + '; ' with engine.connect() as con: con.execute(query) print('Analysis of ' + str(collection_size) + ' in ' + str(time.time() - start_time_analysis) + 's')
def count_report_to_analyse(): q = session.query(IndividualReportModel) return q.filter_by(analysis_done=False).count()
if __name__ == '__main__': fieldnames = [ 'npa_plz', 'town', 'state', 'latitude', 'longitude' ] with open(str(OUTPUT_GEO_CODING_FILE), 'w', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() # query all location first q = session.query(LocationModel) locations = q.all() for location in locations: writer.writerow({ 'npa_plz': location.npa, 'town': location.town, 'state': location.state, 'longitude': location.longitude, 'latitude': location.latitude })
def export_daily_report_to_csv(): from_day = '2020-03-24' #starting date in database !!! to_day = '2020-03-29' from_date = datetime.strptime(from_day, DAY_FORMAT) to_date = datetime.strptime(to_day, DAY_FORMAT) # clean the current output for _f in DAILY_REPORT_DIR.glob('*.csv'): _f.unlink() # query all location first q = session.query(LocationModel) locations = q.all() geo_locations = {} for location in locations: geo_locations[str(location.npa)] = { 'longitude': location.longitude, 'latitude': location.latitude, 'state': location.state } fieldnames = [ 'date', 'state', 'npa_plz', 'latitude', 'longitude', 'total_healthy', 'total_sick_guess_no_corona', 'total_sick_guess_corona', 'total_sick_corona_confirmed', 'total_recovered_confirmed', 'total_recovered_not_confirmed', ] merge_file = OUTPUT_DATASETS_PATH / 'merge-all-days.csv' with open(str(merge_file), 'w', newline='') as csvfile: merge_writer = csv.DictWriter(csvfile, fieldnames=fieldnames) merge_writer.writeheader() # iterate over days totals = {} current_day = from_date while current_day <= to_date: # create daily file daily_file = DAILY_REPORT_DIR / ('ch-covid-19-' + current_day.strftime(DAY_FORMAT) + '.csv') with open(str(daily_file), 'w', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() q = session.query(DailyDiagnosticChangeModel) daily_changes = q.filter_by(year=current_day.year, month=current_day.month, day=current_day.day).all() print(current_day) if daily_changes is not None: for daily_change in daily_changes: try: if daily_change.locator not in totals: # then create it totals[daily_change.locator] = { 'longitude': geo_locations[daily_change.locator]['longitude'], 'latitude': geo_locations[daily_change.locator]['latitude'], 'state': geo_locations[daily_change.locator]['state'], 'data': [ daily_change.diagnostic_0, daily_change.diagnostic_1, daily_change.diagnostic_2, daily_change.diagnostic_3, daily_change.diagnostic_4, daily_change.diagnostic_5, ] } else: # use the previous total totals[daily_change.locator]['data'] = [ totals[daily_change.locator]['data'][0] + daily_change.diagnostic_0, totals[daily_change.locator]['data'][1] + daily_change.diagnostic_1, totals[daily_change.locator]['data'][2] + daily_change.diagnostic_2, totals[daily_change.locator]['data'][3] + daily_change.diagnostic_3, totals[daily_change.locator]['data'][4] + daily_change.diagnostic_4, totals[daily_change.locator]['data'][5] + daily_change.diagnostic_5, ] # totals cannot be negative, just fix if some errors are present in the dataset if totals[daily_change.locator]['data'][0] < 0: totals[daily_change.locator]['data'][0] = 0 if totals[daily_change.locator]['data'][1] < 0: totals[daily_change.locator]['data'][1] = 0 if totals[daily_change.locator]['data'][2] < 0: totals[daily_change.locator]['data'][2] = 0 if totals[daily_change.locator]['data'][3] < 0: totals[daily_change.locator]['data'][3] = 0 if totals[daily_change.locator]['data'][4] < 0: totals[daily_change.locator]['data'][4] = 0 if totals[daily_change.locator]['data'][5] < 0: totals[daily_change.locator]['data'][5] = 0 except KeyError: # wrong npa print('Wrong npa: ' + daily_change.locator) total_status = 0 # export all totals in the current day # this take into account the total from previous days if no daily change in the current day for npa, total in totals.items(): total_status += sum(total['data']) writer.writerow({ 'npa_plz': npa, 'state': total['state'], 'longitude': total['longitude'], 'latitude': total['latitude'], 'date': current_day.strftime(DAY_FORMAT), 'total_healthy': total['data'][0], 'total_sick_guess_no_corona': total['data'][1], 'total_sick_guess_corona': total['data'][2], 'total_sick_corona_confirmed': total['data'][3], 'total_recovered_not_confirmed': total['data'][4], 'total_recovered_confirmed': total['data'][5], }) merge_writer.writerow({ 'npa_plz': npa, 'state': total['state'], 'longitude': total['longitude'], 'latitude': total['latitude'], 'date': current_day.strftime(DAY_FORMAT), 'total_healthy': total['data'][0], 'total_sick_guess_no_corona': total['data'][1], 'total_sick_guess_corona': total['data'][2], 'total_sick_corona_confirmed': total['data'][3], 'total_recovered_not_confirmed': total['data'][4], 'total_recovered_confirmed': total['data'][5], }) print(total_status) current_day = current_day + timedelta(days=1)