def create_submission(model, datasets, raw_datasets): submission_parts = [] for ds, raw_ds in zip(datasets, raw_datasets): ds_predictions = get_predictions(ds, model) submission_part = predictions_to_submission(ds_predictions, raw_ds) submission_parts.append(submission_part) submission = pd.concat(submission_parts, ignore_index=True) return submission
def test(mhc, data, model, model_path): ''' Training protocol ''' # print out options print('Testing\nMHC: %s\nData: %s\nModel: %s\nSave path: %s' % (mhc, data, model, model_path)) # load training test_data = Dataset.from_csv(filename=data, sep=',', allele_column_name='mhc', peptide_column_name='peptide', affinity_column_name='IC50(nM)') # apply cut/pad or mask to same length if 'lstm' in model or 'gru' in model: test_data.mask_peptides() else: test_data.cut_pad_peptides() # get the allele specific data mhc_test = test_data.get_allele(mhc) # define model if model == 'fc': model = models.mhcnuggets_fc() elif model == 'gru': model = models.mhcnuggets_gru() elif model == 'lstm': model = models.mhcnuggets_lstm() elif model == 'chunky_cnn': model = models.mhcnuggets_chunky_cnn() elif model == 'spanny_cnn': model = models.mhcnuggets_spanny_cnn() # compile model model.load_weights(model_path) model.compile(loss='mse', optimizer=Adam(lr=0.001)) # get tensorized values for training test_peptides, test_continuous, test_binary = mhc_test.tensorize_keras( embed_type='softhot') # test preds_continuous, preds_binary = get_predictions(test_peptides, model) test_auc = roc_auc_score(test_binary, preds_continuous) test_f1 = f1_score(test_binary, preds_binary) test_ktau = kendalltau(test_continuous, preds_continuous)[0] print('Test AUC: %.4f, F1: %.4f, KTAU: %.4f' % (test_auc, test_f1, test_ktau))
def main(): if len(sys.argv) <= 2: print("Please provide the inputfile(realtimedata)", len(sys.argv)) exit(-1) window = 20 window2 = 10 user_rating = 5 chances = 0.2 inputfile = sys.argv[1] out_dir = sys.argv[1].split('.')[0] + "_filtered" if len(sys.argv) == 3: print("User past rating out of 10", int(sys.argv[2])) user_rating = int(sys.argv[2]) baseline_dataset = load_my_data("basedata/main.csv", ",") baseline_dataset = preprocess_data(baseline_dataset, window, window2) #print(baseline_dataset) m_mdls = mdl.train_model(baseline_dataset) print("DONE TRAINING MODELS WITH BASELINE DATASET") #os.mkdir(out_dir) filename = inputfile dataset1 = load_my_data(inputfile, ',') sliding_samples = 120 rows = dataset1.shape[0] for timestep in range(int(rows / sliding_samples) * sliding_samples): start = timestep end = timestep + sliding_samples print("PROCESSING WINDOW FROM ", start, " UNTIL ", end) if end > rows: end = rows - 1 if end - start + 1 < 12: timestep += sliding_samples continue dset1 = dataset1[start:end] dset1 = preprocess_data(dset1, window, window2) #print(dset1) out_lbls, prob = mdl.get_predictions(dset1, m_mdls) chances, user_rating = get_accident_probability( user_rating, out_lbls, prob, chances) #write_to_file(out_dir, inputfile.split('.')[0], dataset1) timestep += sliding_samples print("CHANCES OF ACCIDENT:", chances, " NEW USER RATING:", round(user_rating))
def calculate_relation(mhc, data, model, weights_dir): ''' Training protocol ''' print('Calculating tuning MHC for %s' % mhc) relations_dict = {} # get the allele specific data mhc_data = data.get_allele(mhc) train_peptides, train_continuous, train_binary = mhc_data.tensorize_keras(embed_type='softhot') best_mhc = '' best_auc = 0 num_mhc = len(mhc_data.peptides) for tuning_mhc in sorted(set(data.alleles)): # don't want to tune with ourselves if mhc == tuning_mhc: continue # define the path to save weights try: model_path = os.path.join(weights_dir, tuning_mhc + '.h5') model.load_weights(model_path) except IOError: continue preds_continuous, preds_binary = get_predictions(train_peptides, model) try: auc = roc_auc_score(train_binary, preds_continuous) #print ('MHC: %s, AUC: %.4f, F1: %.4f, KTAU: %.4f' % (tuning_mhc, # auc, # f1, # ktau)) if auc > best_auc: best_mhc = tuning_mhc best_auc = auc num_tuning_mhc = len(data.get_allele(tuning_mhc).peptides) except ValueError: continue return best_mhc, best_auc, num_mhc, num_tuning_mhc
def predict(model, weights_path, peptides_path): ''' Training protocol ''' # read peptides peptides = [p.strip() for p in open(peptides_path)] print('Predicting for %d peptides' % (len(peptides))) # apply cut/pad or mask to same length if 'lstm' in model or 'gru' in model: normed_peptides = dataset.mask_peptides(peptides) else: normed_peptides = dataset.cut_pad_peptides(peptides) # get tensorized values for prediction peptides_tensor = dataset.tensorize_keras(normed_peptides, embed_type='softhot') # make model print('Building model') # define model if model == 'fc': model = models.mhcnuggets_fc() elif model == 'gru': model = models.mhcnuggets_gru() elif model == 'lstm': model = models.mhcnuggets_lstm() elif model == 'chunky_cnn': model = models.mhcnuggets_chunky_cnn() elif model == 'spanny_cnn': model = models.mhcnuggets_spanny_cnn() model.load_weights(weights_path) model.compile(loss='mse', optimizer=Adam(lr=0.001)) # test model preds_continuous, preds_binary = get_predictions(peptides_tensor, model) ic50s = [dataset.map_proba_to_ic50(p[0]) for p in preds_continuous] for i, peptide in enumerate(peptides): print(peptide, ic50s[i])
def run_app(): css.hide_menu() css.limit_plot_size() # Get cached country data countries = fetch_country_data() if countries.stale: st.caching.clear_cache() countries = fetch_country_data() st.markdown( body=generate_html(text=f"Corona Calculator", bold=True, tag="h1"), unsafe_allow_html=True, ) st.markdown( body=generate_html( tag="h2", text="A tool to help you visualize the impact of social distancing <br>", ), unsafe_allow_html=True, ) st.markdown( body=generate_html( text="<strong>Disclaimer:</strong> <em>The creators of this application are not healthcare professionals. " "The illustrations provided were estimated using best available data but might not accurately reflect reality.</em>", color="gray", font_size="12px", ), unsafe_allow_html=True, ) st.markdown( body=generate_html( tag="h4", text=f"<u><a href=\"{NOTION_MODELLING_DOC}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>" "Methodology</a></u> <span>  </span>" f"<u><a href=\"{MEDIUM_BLOGPOST}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>" "Blogpost</a> </u>" "<hr>", ), unsafe_allow_html=True, ) sidebar = Sidebar(countries) country = sidebar.country country_data = countries.country_data[country] _historical_df = countries.historical_country_data historical_data = _historical_df.loc[_historical_df.index == country] number_cases_confirmed = country_data["Confirmed"] population = country_data["Population"] num_hospital_beds = country_data["Num Hospital Beds"] st.subheader(f"How has the disease spread in {country}?") st.write( "The number of reported cases radically underestimates the true cases, because people do not show symptoms for " "several days, not everybody gets tested, and the tests take a few days to return results. " "The extent depends upon your country's testing strategy." " This estimate (14% reporting) is from China ([source](https://science.sciencemag.org/content/early/2020/03/13/science.abb3221))." ) # Estimate true cases true_cases_estimator = models.TrueInfectedCasesModel( constants.ReportingRate.default ) estimated_true_cases = true_cases_estimator.predict(number_cases_confirmed) reported_vs_true_cases(int(number_cases_confirmed), estimated_true_cases) st.markdown( f"Given the prevalence of the infection in your country, the probability of being infected at this time is " f"**{estimated_true_cases / population:.3%}**. Even if you show no symptoms, the probability of being infected is " f"**{models.get_probability_of_infection_give_asymptomatic(population, estimated_true_cases, constants.AsymptomaticRate.default):.3%}**. " f"Note that these probabilities are on a country-wide basis and so may not apply to your situation." ) # Plot historical data fig = graphing.plot_historical_data(historical_data) st.write(fig) asymptomatic_cases_estimator = models.AsymptomaticCasesModel( constants.AsymptomaticRate.default ) contact_rate = sidebar.contact_rate asymptomatic_sir_model = models.AsymptomaticSIRModel( transmission_rate_per_contact=constants.TransmissionRatePerContact.default_per_symptom_state, contact_rate=contact_rate, asymptomatic_cases_model=asymptomatic_cases_estimator, recovery_rate=constants.RecoveryRate.default, normal_death_rate=constants.MortalityRate.default, critical_death_rate=constants.CriticalDeathRate.default, hospitalization_rate=constants.HospitalizationRate.default, hospital_capacity=num_hospital_beds ) df = models.get_predictions( cases_estimator=true_cases_estimator, sir_model=asymptomatic_sir_model, num_diagnosed=number_cases_confirmed, num_recovered=country_data["Recovered"], num_deaths=country_data["Deaths"], area_population=population, ) st.subheader("How will my actions affect the spread?") st.write( "The critical factor for controlling spread is how many others infected people interact with each day. " "This has a dramatic effect upon the dynamics of the disease. " ) st.write( "**Play with the slider to the left to see how this changes the dynamics of disease spread**" ) df_base = df[~df.Status.isin(["Need Hospitalization"])] base_graph = graphing.infection_graph(df_base, df_base.Forecast.max(), sidebar.contact_rate) st.warning(graph_warning) st.write(base_graph) st.subheader("How will this affect my healthcare system?") st.write( "The important variable for hospitals is the peak number of people who require hospitalization" " and ventilation at any one time." ) # Do some rounding to avoid beds sounding too precise! approx_num_beds = round(num_hospital_beds / 100) * 100 st.write( f"Your country has around **{approx_num_beds:,}** beds. Bear in mind that most of these " "are probably already in use for people sick for other reasons." ) st.write( "It's hard to know exactly how many ventilators are present per country, but there will certainly be a worldwide " "shortage. Many countries are scrambling to buy them [(source)](https://www.reuters.com/article/us-health-coronavirus-draegerwerk-ventil/germany-italy-rush-to-buy-life-saving-ventilators-as-manufacturers-warn-of-shortages-idUSKBN210362)." ) peak_occupancy = df.loc[df.Status == "Need Hospitalization"]["Forecast"].max() percent_beds_at_peak = min(100 * num_hospital_beds / peak_occupancy, 100) num_beds_comparison_chart = graphing.num_beds_occupancy_comparison_chart( num_beds_available=approx_num_beds, max_num_beds_needed=peak_occupancy, contact_rate=sidebar.contact_rate ) st.write(num_beds_comparison_chart) st.markdown( f"At peak, **{int(peak_occupancy):,}** people will need hospital beds. ** {percent_beds_at_peak:.1f}% ** of people " f"who need a bed in hospital will have access to one given your country's historical resources. This does " f"not take into account any special measures that may have been taken in the last few months." ) st.subheader("How severe will the impact be?") num_dead = df[df.Status == "Dead"].Forecast.iloc[-1] num_recovered = df[df.Status == "Recovered"].Forecast.iloc[-1] st.markdown( f"If the average person in your country adopts the selected behavior, we estimate that **{int(num_dead):,}** " f"people will die." ) st.markdown( f"The graph above below a breakdown of casualties and hospitalizations by age group." ) outcomes_by_age_group = models.get_status_by_age_group(num_dead, num_recovered) fig = graphing.age_segregated_mortality( outcomes_by_age_group.loc[:, ["Dead", "Need Hospitalization"]], contact_rate=sidebar.contact_rate ) st.write(fig) st.write( f"Parameters by age group, including demographic distribution, are [worldwide numbers](https://population.un.org/wpp/DataQuery/) " f"so they may be slightly different in your country." ) st.write( f"We've used mortality rates from this [recent paper from Imperial College](https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-NPI-modelling-16-03-2020.pdf?fbclid=IwAR3TzdPTcLiOZN5r2dMd6_08l8kG0Mmr0mgP3TdzimpqB8H96T47ECBUfTM). " f"However, we've adjusted them according to the [maximum mortality rate recorded in Wuhan](https://wwwnc.cdc.gov/eid/article/26/6/20-0233_article)" f" when your country's hospitals are overwhelmed: if more people who need them lack hospital beds, more people will die." ) st.write("<hr>", unsafe_allow_html=True) st.write( "Like this? [Click here to share it on Twitter](https://ctt.ac/u5U39), and " "[let us know your feedback via Google Form](https://forms.gle/J6ZFFgh4rVQm4y8G7)" ) utils.insert_github_logo()
def run_app(): ############################### Data Load ################################## css.hide_menu() css.limit_plot_size() # Get cached country data countries = _fetch_country_data() global_data = _fetch_global_data() if countries.stale: st.caching.clear_cache() countries = _fetch_country_data() if global_data.stale: st.caching.clear_cache() global_data = _fetch_global_data() ################## Heading Section #################################### utils.img_html( alt_text='Fractal', href='https://fractal.ai', src= 'https://i2.wp.com/fractal.ai/wp-content/uploads/2018/02/header-black-logo.png?fit=126%2C43&ssl=1', attributes=dict(width=125, height=43, target='_blank')) st.markdown( body=generate_html(text=f"Australia COVID-19 Simulator", bold=True, tag="h1"), unsafe_allow_html=True, ) st.markdown( body=generate_html( tag="h2", text= "A tool to help you visualize the impact of social distancing <br>", ), unsafe_allow_html=True, ) st.markdown( body=generate_html( text= "<strong>Disclaimer:</strong> <em>The creators of this application are not healthcare professionals. " "The illustrations provided were estimated using best available data but might not accurately reflect reality.</em>", color="gray", font_size="12px", ), unsafe_allow_html=True, ) ### Add side bar sidebar = Sidebar(countries) ###################### historical and forecast chart ############################## ### Get selected country country = sidebar.country country_data = countries.country_data[country] _historical_df = countries.historical_country_data if country == "Australia": historical_data_custom = data_utils.make_historical_data( _historical_df) try: forecasted_data = forecast_utils.get_forecasts( country, constants.FORECAST_HORIZON) historical_plot_df = data_utils.prep_plotting_data( forecasted_data, historical_data_custom) # fig = graphing.plot_historical_data(historical_data_plot, con_flag=True) fig = graphing.plot_time_series_forecasts(historical_plot_df, country_flag=True, country_name=country) except Exception as exc: print("Error", exc) else: historical_data_custom = _historical_df.loc[_historical_df.index == country] try: forecasted_data = forecast_utils.get_forecasts( country, constants.FORECAST_HORIZON) historical_plot_df = data_utils.prep_plotting_data( forecasted_data, historical_data_custom) # fig = graphing.plot_historical_data(historical_data_plot) fig = graphing.plot_time_series_forecasts(historical_plot_df, country_flag=False, country_name=country) except Exception as exc: print(exc) historical_data = _historical_df.loc[_historical_df.index == country] number_cases_confirmed = country_data["Confirmed"] population = country_data["Population"] num_hospital_beds = country_data["Num Hospital Beds"] age_data = constants.AGE_DATA.loc[constants.AGE_DATA["State"] == country, :] st.subheader( f"How is the disease likely to spread in {country} in the next week?") # Estimate true cases true_cases_estimator = models.TrueInfectedCasesModel( constants.ReportingRate.default) # estimated_true_cases = true_cases_estimator.predict(number_cases_confirmed) try: week1_est = historical_plot_df.tail(1) reported_vs_true_cases( int(number_cases_confirmed), week1_est["confirmed"].tolist()[0], graphing.abbreviate(week1_est["lower_bound"].tolist()[0], round_factor=0), graphing.abbreviate(week1_est["upper_bound"].tolist()[0], round_factor=0)) # Plot historical data st.write(fig) print("Historical Data with Forecasts plotted") except Exception as exc: print(exc) st.markdown( f"Something went wrong :( Forecasts unavailable. Contact admin") ###################### SIR Model and Simulator ############################## # Predict infection spread sir_model = models.SIRModel( transmission_rate_per_contact=constants.TransmissionRatePerContact. default, contact_rate=sidebar.contact_rate, recovery_rate=constants.RecoveryRate.default, normal_death_rate=constants.MortalityRate.default, critical_death_rate=constants.CriticalDeathRate.default, hospitalization_rate=constants.HospitalizationRate.default, hospital_capacity=num_hospital_beds, ) df = models.get_predictions(cases_estimator=true_cases_estimator, sir_model=sir_model, num_diagnosed=number_cases_confirmed, num_recovered=country_data["Recovered"], num_deaths=country_data["Deaths"], area_population=population, max_days=sidebar.num_days_for_prediction) st.subheader("How will my actions affect the spread?") st.write( "**Use the slider in the sidebar to see how this changes the dynamics of disease spread**" ) df_base = df[~df.Status. isin(["Need Hospitalization", "Recovered", "Susceptible"])] base_graph = graphing.infection_graph(df_base, df_base.Forecast.max(), population * 0.5, population * 0.75) # st.warning(graph_warning) st.write(base_graph) print("Infections Graph Plotted") ###################### Effect on hospitals ############################## st.subheader("How will this affect my healthcare system?") # Do some rounding to avoid beds sounding too precise! approx_num_beds = round(num_hospital_beds / 100) * 100 st.write( f"{country} has around **{approx_num_beds:,}** beds. Bear in mind that most of these " "are probably already in use for people sick for other reasons.") peak_occupancy = df.loc[df.Status == "Need Hospitalization"]["Forecast"].max() percent_beds_at_peak = min(100 * num_hospital_beds / peak_occupancy, 100) num_beds_comparison_chart = graphing.num_beds_occupancy_comparison_chart( num_beds_available=approx_num_beds, max_num_beds_needed=peak_occupancy) st.write(num_beds_comparison_chart) st.markdown( f"At peak, **{int(peak_occupancy):,}** people will need hospital beds. At least ** {100 - percent_beds_at_peak:.1f}% ** of " f" people who need a bed in hospital might not have access given historical resources of {country}." ) ###################### Death Charts ############################## st.subheader("How severe will the impact be?") num_dead = df[df.Status == "Dead"].Forecast.iloc[-1] num_recovered = df[df.Status == "Recovered"].Forecast.iloc[-1] glob_hist = global_data.historical_country_data uk_data = glob_hist.loc[(glob_hist.index == "UK") | \ (glob_hist.index == "United Kingdom"), :].copy() uk_death_mirror = get_uk_death_mirror(uk_data, country_data["Deaths"]) death_plot = graphing.plot_death_timeseries(df[df.Status == "Dead"], uk_death_mirror, country_name=country) st.markdown( f"If the average person in {country} adopts the selected behavior, we estimate that **{int(num_dead):,}** " f"people will die.") st.markdown(f"This graph illustrates predicted deaths.") outcomes_by_age_group = models.get_status_by_age_group( num_dead, num_recovered, age_data) fig = graphing.age_segregated_mortality( outcomes_by_age_group.loc[:, ["Dead"]]) st.write(death_plot) print("Deaths Graph Plotted") st.markdown("-------") st.markdown(body=generate_html("We'd love to hear from you!", tag='h2', color='#0090c4'), unsafe_allow_html=True) user_input = st.text_input("Enter your email to contact us") utils.contact_us(user_input) ###################### Credits ############################## st.subheader("References and Credits:") st.markdown( body=generate_html( tag="h4", text= f"<u><a href=\"{NOTION_MODELLING_DOC}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>" "Methodology</a></u> <span>  </span>" "<hr>", ), unsafe_allow_html=True, ) st.markdown( body=generate_html( tag="h4", text= f"<u><a href=\"https://github.com/CSSEGISandData/COVID-19\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>" "2019 Novel Coronavirus COVID-19 (2019-nCoV) Data Repository by Johns Hopkins CSSE</a></u> <span>  </span>" "<hr>", ), unsafe_allow_html=True, ) print("complete....")
def run_app(): css.hide_menu() css.limit_plot_size() # Get cached country data countries = _fetch_country_data() if countries.stale: st.caching.clear_cache() countries = _fetch_country_data() st.markdown( body=generate_html(text=f"Corona Calculator", bold=True, tag="h1"), unsafe_allow_html=True, ) st.markdown( body=generate_html( tag="h2", text= "A tool to help you visualize the impact of social distancing <br>", ), unsafe_allow_html=True, ) st.markdown( body=generate_html( text= "<strong>Disclaimer:</strong> <em>The creators of this application are not healthcare professionals. " "The illustrations provided were estimated using best available data but might not accurately reflect reality.</em>", color="gray", font_size="12px", ), unsafe_allow_html=True, ) st.markdown( body=generate_html( tag="h4", text= f"<u><a href=\"{NOTION_MODELLING_DOC}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>" "Methodology</a></u> <span>  </span>" f"<u><a href=\"{MEDIUM_BLOGPOST}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>" "Blogpost</a> </u>" "<hr>", ), unsafe_allow_html=True, ) sidebar = Sidebar(countries) country = sidebar.country country_data = countries.country_data[country] number_cases_confirmed = country_data["Confirmed"] population = country_data["Population"] num_hospital_beds = country_data["Num Hospital Beds"] sir_model = models.SIRModel( transmission_rate_per_contact=constants.TransmissionRatePerContact. default, contact_rate=sidebar.contact_rate, recovery_rate=constants.RecoveryRate.default, normal_death_rate=constants.MortalityRate.default, critical_death_rate=sidebar.severe_mortality_rate, hospitalization_rate=constants.HospitalizationRate.default, hospital_capacity=num_hospital_beds, ) true_cases_estimator = models.TrueInfectedCasesModel( constants.AscertainmentRate.default) df = models.get_predictions( true_cases_estimator, sir_model, number_cases_confirmed, population, sidebar.num_days_for_prediction, ) reported_vs_true_cases( number_cases_confirmed, true_cases_estimator.predict(number_cases_confirmed)) st.write( "The number of reported cases radically underestimates the true cases, because people do not show symptoms for " "several days, not everybody gets tested, and the tests take a few days to return results. " "The extent depends upon your country's testing strategy." " We estimated the above using numbers from Japan ([source](https://www.ncbi.nlm.nih.gov/pubmed/32033064))." ) st.subheader("How will the disease spread?") st.write( "The critical factor for controlling spread is how many others infected people interact with each day. " "This has a dramatic effect upon the dynamics of the disease. ") st.write( "**Play with the slider to the left to see how this changes the dynamics of disease spread**" ) df_base = df[~df.Status.isin(["Need Hospitalization", "Need Ventilation"])] base_graph = graphing.infection_graph(df_base, df.Forecast.max()) st.warning(graph_warning) st.write(base_graph) hospital_graph = graphing.hospitalization_graph( df[df.Status.isin(["Infected", "Need Hospitalization"])], num_hospital_beds, max(num_hospital_beds, df.Forecast.max()), ) st.write( "Note that we use a fixed estimate of the mortality rate here, of 1% [(source)](https://institutefordiseasemodeling.github.io/nCoV-public/analyses/first_adjusted_mortality_estimates_and_risk_assessment/2019-nCoV-preliminary_age_and_time_adjusted_mortality_rates_and_pandemic_risk_assessment.html). " "In reality, the mortality rate will be highly dependent upon the load upon the healthcare system and " "the availability of treatment. Some estimates ([like this one](https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30195-X/fulltext)) are closer to 6%." ) st.subheader("How will this affect my healthcare system?") st.write( "The important variable for hospitals is the peak number of people who require hospitalization" " and ventilation at any one time.") # Do some rounding to avoid beds sounding too precise! st.write( f"Your country has around **{round(num_hospital_beds / 100) * 100:,}** beds. Bear in mind that most of these " "are probably already in use for people sick for other reasons.") st.write( "It's hard to know how many ventilators are present per country, but there will certainly be a worldwide " "shortage. Many countries are scrambling to buy them [(source)](https://www.reuters.com/article/us-health-coronavirus-draegerwerk-ventil/germany-italy-rush-to-buy-life-saving-ventilators-as-manufacturers-warn-of-shortages-idUSKBN210362)." ) st.warning(graph_warning) st.write(hospital_graph) peak_occupancy = df.loc[df.Status == "Need Hospitalization"]["Forecast"].max() percent_beds_at_peak = min(100 * num_hospital_beds / peak_occupancy, 100) st.markdown( f"At peak, **{peak_occupancy:,}** people will need hospital beds. ** {percent_beds_at_peak:.1f} % ** of people " f"who need a bed in hospital will have access to one given your country's historical resources. This does " f"not take into account any special measures that may have been taken in the last few months." )
def train(mhc, data, model, model_path, lr, n_epoch, transfer_path): ''' Training protocol ''' # print out options print( 'Training\nMHC: %s\nData: %s\nModel: %s\nSave path: %s\nTransfer: %s' % (mhc, data, model, model_path, transfer_path)) # load training train_data = Dataset.from_csv(filename=data, sep=',', allele_column_name='mhc', peptide_column_name='peptide', affinity_column_name='IC50(nM)') # apply cut/pad or mask to same length if 'lstm' in model or 'gru' in model: train_data.mask_peptides() else: train_data.cut_pad_peptides() # get the allele specific data mhc_train = train_data.get_allele(mhc) # define model if model == 'fc': model = models.mhcnuggets_fc() elif model == 'gru': model = models.mhcnuggets_gru() elif model == 'lstm': model = models.mhcnuggets_lstm() elif model == 'chunky_cnn': model = models.mhcnuggets_chunky_cnn() elif model == 'spanny_cnn': model = models.mhcnuggets_spanny_cnn() # check if we need to do transfer learning if transfer_path: model.load_weights(transfer_path) # compile model model.compile(loss='mse', optimizer=Adam(lr=0.001)) # get tensorized values for training train_peptides, train_continuous, train_binary = mhc_train.tensorize_keras( embed_type='softhot') # convergence criterion highest_f1 = -1 for epoch in range(n_epoch): # train model.fit(train_peptides, train_continuous, epochs=1, verbose=0) # test model on training data train_preds_cont, train_preds_bin = get_predictions( train_peptides, model) train_auc = roc_auc_score(train_binary, train_preds_cont) train_f1 = f1_score(train_binary, train_preds_bin) train_ktau = kendalltau(train_continuous, train_preds_cont)[0] print('epoch %d / %d' % (epoch, n_epoch)) print('Train AUC: %.4f, F1: %.4f, KTAU: %.4f' % (train_auc, train_f1, train_ktau)) # convergence if train_f1 > highest_f1: highest_f1 = train_f1 best_epoch = epoch model.save_weights(model_path) print('Done!')
# -*- coding: utf-8 -*- """ Main file for the second Machine Learning project: ### TODO ### Authors: Kirill IVANOV, Matthias RAMIREZ, Nicolas TALABOT """ import argparse from models import get_pretrained_models, train_and_get_models, get_predictions # Argument parser parser = argparse.ArgumentParser() parser.add_argument( "--train_model", help="train the whole model, do not reuse the pre-trained one", action="store_true") args = parser.parse_args() data_dir = 'training/' test_data_dir = 'test_set_images/' model1, model2, model3 = train_and_get_models( data_dir) if args.train_model else get_pretrained_models() get_predictions(model1, model2, model3, test_data_dir)