Example #1
0
def main_stats_page():
    data_dir = Path.cwd() / 'data'
    db = CSVDataBase(data_dir)

    # load data
    df = db.import_static_data()
    totals = df.sum()
    ts = db.get_csse_time_series_deaths()
    n = 15

    st.markdown(f":skull: reported deaths: `{totals['deaths']:,.0f}` ")
    st.markdown(
        f":male_zombie: reported recoveries: `{totals['recov']:,.0f}`  \n")
    st.markdown(
        f":face_with_thermometer: reported cases: `{totals['cases']:,.0f}` ")
    st.markdown("  \n  \n  \n  \n")

    # time series deaths plot
    st.markdown("#### :skull: deaths over time\n")
    st.markdown("  \n  \n  \n  \n  \n")
    countries = ['United Kingdom', 'Spain', 'US', 'Italy', 'France']
    # all_countries = ts['country'].unique().tolist()
    # st.sidebar.multiselect('select countries', all_countries, default=countries)
    chart = utilities.line_plot(ts, countries)
    st.altair_chart(chart)

    # bar plots
    chart = utilities.bar_chart(df, 'country', 'deaths', n=n)
    st.altair_chart(chart)
    chart = utilities.bar_chart(df, 'country', 'cases', n=n)
    st.altair_chart(chart)

    # scatter plot
    scatter = utilities.scatter_plot(df, 'cases_per_million', 'pop_density',
                                     'country', n)
    st.altair_chart(scatter)

    # data table
    pd.set_option('display.max_colwidth', -1)
    st.markdown('### data table\n', unsafe_allow_html=False)
    st.markdown(
        "*click column headers to sort*  :arrow_up_small::arrow_down_small:")
    formatted_df = df.style.format({
        "cases": "{:,.0f}",
        "deaths": "{:,.0f}",
        "recov": "{:,.0f}"
    })
    st.write(formatted_df)

    st.markdown(
        "sources  \n[wikipedia](https://en.wikipedia.org/wiki/Template:2019%E2%80%9320_coronavirus_pandemic_data)  \
        \n[world bank](http://api.worldbank.org/v2/en/indicator/EN.POP.DNST?downloadformat=csv) \
        \n[johns hopkins](https://github.com/CSSEGISandData/COVID-19) \
        \n[apple](https://www.apple.com/covid19/mobility) \
        ")
Example #2
0
st.markdown(f"#### :skull: deaths over time\n")
st.markdown(f"  \n  \n  \n  \n  \n")
countries = ['United Kingdom', 'Spain', 'US', 'Italy', 'France']
all_countries = ts['country'].unique().tolist()
# st.sidebar.multiselect('select countries', all_countries, default=countries)
chart = utilities.line_plot(ts, countries)
st.altair_chart(chart)

#bar plots
chart = utilities.bar_chart(df, 'country', 'deaths', n=n)
st.altair_chart(chart)
chart = utilities.bar_chart(df, 'country', 'cases', n=n)
st.altair_chart(chart)

#scatter plot
scatter = utilities.scatter_plot(df, 'cases_per_million', 'pop_density',
                                 'country', n)
st.altair_chart(scatter)

#data table
pd.set_option('display.max_colwidth', -1)
st.markdown('### data table\n', unsafe_allow_html=False)
st.markdown(
    "*click column headers to sort*  :arrow_up_small::arrow_down_small:")
formatted_df = df.style.format({
    "cases": "{:,.0f}",
    "deaths": "{:,.0f}",
    "recov": "{:,.0f}"
})
st.write(formatted_df)

st.markdown(
Example #3
0
def xtb_conformers(cage,
                   cage_name,
                   etemp,
                   output_dir,
                   conformer_dir,
                   opt=False,
                   opt_level=None,
                   solvent=None):

    if not exists(output_dir):
        os.mkdir(output_dir)

    if solvent is None:
        solvent_str = None
        solvent_grid = 'normal'
    else:
        solvent_str, solvent_grid = solvent

    print('doing XTB conformer sorting by energy')
    conformers = glob.glob(f'{conformer_dir}/conf_*.xyz')
    ids = []
    energies = []
    min_energy = 10E20
    for file in sorted(conformers):
        id = file.replace('.xyz', '').split('_')[-1]
        cage.update_from_file(file)
        if opt:
            print(f'optimising conformer {id}')
            xtb_opt = stk.XTB(
                xtb_path='/home/atarzia/software/xtb-190806/bin/xtb',
                output_dir=f'opt_{cage_name}_{id}',
                gfn_version=2,
                num_cores=6,
                opt_level=opt_level,
                charge=4,
                num_unpaired_electrons=0,
                max_runs=1,
                electronic_temperature=etemp,
                calculate_hessian=False,
                unlimited_memory=True,
                solvent=solvent_str,
                solvent_grid=solvent_grid)
            xtb_opt.optimize(mol=cage)
            cage.write(join(f'{output_dir}', f'conf_{id}_opt.xyz'))

        print(f'calculating energy of {id}')
        # Extract energy.
        xtb_energy = stk.XTBEnergy(
            xtb_path='/home/atarzia/software/xtb-190806/bin/xtb',
            output_dir=f'ey_{cage_name}_{id}',
            num_cores=6,
            charge=4,
            num_unpaired_electrons=0,
            electronic_temperature=etemp,
            unlimited_memory=True,
            solvent=solvent_str,
            solvent_grid=solvent_grid)
        energy = xtb_energy.get_energy(cage)
        if energy < min_energy:
            min_energy_conformer = file
            min_energy = energy
        ids.append(id)
        energies.append(energy)

    print('done', min_energy, min_energy_conformer)
    cage.update_from_file(min_energy_conformer)
    cage.write(f'{cage_name}_optc.mol')
    cage.write(f'{cage_name}_optc.xyz')
    cage.dump(f'{cage_name}_optc.json')

    energies = [(i - min(energies)) * 2625.5 for i in energies]
    fig, ax = scatter_plot(X=ids,
                           Y=energies,
                           xtitle='conformer id',
                           ytitle='rel. energy [kJmol$^{-1}$]',
                           xlim=(0, 201),
                           ylim=(-5, 1000))

    fig.tight_layout()
    fig.savefig(join(output_dir, f'{cage_name}_conf_energies.pdf'),
                dpi=720,
                bbox_inches='tight')
    plt.close()
# **Question**: Play with the parameters (*variance*, *scale* and *period*) and see how they affect the plot.

# In[3]:

sample_size = 300
variance = 10  # Variance of the Gaussian noise
scale = 100  # Range
period = 6  # Simulation are based on cosine function (see data_simulation function)

x_train, y_train = data_simulation(int(.7 * sample_size), scale, period,
                                   variance)
x_test, y_test = data_simulation(int(.3 * sample_size), scale, period,
                                 variance)

plt = scatter_plot(
    x_train, x_test, y_train,
    y_test)  # The scatter_plot function is in the utilities script

# ### 2.1.2 Getting - visual - intuition about models' capacity
#
# As seen in class (Slide 38 for example), the higher the model capacity, the better it will fit the training data set (caution though, fitting well the training data does not necesarily lead to good generalization). Here, we use [polynomial regression](https://en.wikipedia.org/wiki/Polynomial_regression) to fit the training set (don't worry, the purpose of the tutorial is not to understand polynomial regression). Note however that the greater is the polynomial degree, the higher is the model capacity.
#
# **Questions**:
# 1. Observe how the fitted curve behave with respect to their polynomial degree.
# 2. Would you prefer to fit the data points with polynomial regression of degree 25 or 100?
# 3. Wich of these curves should have the best generalization error?

# In[6]:

degree = [
    0, 1, 3, 5, 10, 20, 150
Example #5
0
# **Question**: Variez les paramètres (*variance*, *scale* et *period*) et voyez comment ils changent la figure ci-dessous.

# In[3]:

sample_size = 300
variance = 10  # La variance du bruit Gaussien
scale = 100  # L'étendue
period = 6  # La simulation est basée sur la fonction cosinus (voir la fonction data_simulation)

x_train, y_train = data_simulation_(int(.7 * sample_size), scale, period,
                                    variance)
x_test, y_test = data_simulation_(int(.3 * sample_size), scale, period,
                                  variance)

# Cette fonction est dans le fichier utilities.py
plt = scatter_plot(x_train, x_test, y_train, y_test)

# ### 2.1.2 Obtenir une première intuition visuelle de la capacité du modèle
#
# Comme vu dans le cours (par exemple diapo 38), plus haute est la capacité du modèle, meilleur le modèle sera sur l'ensemble d'entraînement (attention, encore une fois, ça ne dit rien sur sa capacité à généraliser). Pour l'instant, nous entraînerons un modèle de [régression polynomiale](https://fr.wikipedia.org/wiki/R%C3%A9gression_polynomiale).
# L'avantage de ce modèle est que nous pouvons facilement changer sa capacité en augmentant le degré du polynôme $m$:
#
# $$\hat{y} = \sum_{i=1}^m w_i x^i $$
#
# mais ce n'est pas très important de comprendre les détails du modèle.
#
# **Questions**:
# 1. Observez l'effet du degré du polynôme sur sa capacité à prédire les données.
# 2. À votre avis, vaut-il mieux utiliser un polynôme de degré 20 ou 50?
# 3. Lesquels de ces polynômes devraient avoir la meilleure erreur de généralisation?
#