Exemple #1
0
def print_md(value):
    display(Markdown(value))
Exemple #2
0
def print_source(obj):
    source = inspect.getsource(obj)
    display(Markdown('```python\n' + source + '\n```'))
Exemple #3
0
def random_derivatives():
    def ho(x):
        if x == 1:
            return ""
        else:
            return x

    a, b, c = np.random.randint(2, 7, 3)
    text = f"Gegeven $f(x) = ({a-1}- {ho(b)}x)^{ho(c)}$, bepaal $f^\\prime(x)$"
    display(Markdown("**(a)** " + text))
    display(Markdown("<details><pre>" + text + "</pre></details>"))

    a, b, c, d = np.random.randint(2, 7, 4)
    text = f"Gegeven $g(x) = {ho(a-1)}x^{ho(b)}\\ \\text{{tan}}({ho(c)}x^{ho(d)})$, geef $g^\\prime(x)$"
    display(Markdown("**(b)** " + text))
    display(Markdown("<details><pre>" + text + "</pre></details>"))

    a, b, c, d = np.random.randint(2, 7, 4)
    text = f"Gegeven $h(x) = \\text{{log}}_{a}({b-1}x-{c}x^{d})$, geef $h^\\prime(x)$"
    display(Markdown("**(c)** " + text))
    display(Markdown("<details><pre>" + text + "</pre></details>"))

    a, b = np.random.randint(2, 7, 2)
    text = f"Gegeven $k(x) = \\frac{{{a}}}{{x^{b}}}$, geef $k^{{\\prime\\prime}}(x)$"
    display(Markdown("**(d)** " + text))
    display(Markdown("<details><pre>" + text + "</pre></details>"))

    a, b = np.random.randint(2, 7, 2)
    text = f"Gegeven $\\frac{{dy}}{{dx}} = x^{a} - {ho(b-1)}y$, geef $\\frac{{d^2y}}{{dx^2}}$"
    display(Markdown("**(e)** " + text))
    display(Markdown("<details><pre>" + text + "</pre></details>"))

    a, b, c, d = np.random.randint(2, 7, 4)
    text = f"Gegeven ${ho(a-1)}x^3y - {ho(b-1)}x^2 + {ho(c-1)}y^4 = {2*d}$, geef $\\frac{{dy}}{{dx}}$"
    display(Markdown("**(f)** " + text))
    display(Markdown("<details><pre>" + text + "</pre></details>"))
Exemple #4
0
def print_h3(value):
    display(Markdown("### " + str(value)))
Exemple #5
0
def latex_formula(form, details=True):
    latex = form.simplify().to_latex(outer=True)
    if latex:
        display(Math(latex))
        if details:
            display(Markdown("<details><pre>$" + latex + "$</pre></details>"))
def printmd(string):
    display(Markdown(string))
def merge_dictionary(pair, v_in):
    v_out = {}
    bigram = re.escape(' '.join(pair))
    p = re.compile(r'(?<!\S)' + bigram + r'(?!\S)')
    for word in v_in:
        w_out = p.sub(''.join(pair), word)
        v_out[w_out] = v_in[word]
    return v_out


bpe_codes = {}
bpe_codes_reverse = {}

for i in range(num_merges):
    display(Markdown("### Iteration {}".format(i + 1)))
    pairs = get_stats(dictionary)
    best = max(pairs, key=pairs.get)
    dictionary = merge_dictionary(best, dictionary)

    bpe_codes[best] = i
    bpe_codes_reverse[best[0] + best[1]] = best

    print("new merge: {}".format(best))
    print("dictionary: {}".format(dictionary))
# -

print(bpe_codes)

# ## SentencePiece
Exemple #8
0
def printmd(S):
    display(Markdown(S))
    return
Exemple #9
0
def ImportData(year,
               resolution,
               data_dir,
               land=False,
               stats=False,
               drop_na=False):
    """
    INPUT:
     - year -> Year of data collection. (string)
     - resolution -> Data resolution. (string)
     - data_dir -> Dataset directory. (path string)
     - land -> Choose whether or not to import only data from land hexagons with
             at least one bioclimate variable available. (bool)
     - stats -> Choose whether or not to display some statistics about the data.
             (bool)
     - drop_na -> Choose whether or not to drop rows with any NA value in the 
             bioclimatic variables. (bool)
    
    OUTPUT:
     - hexagones -> Geodataframe containing all the available selected data. 
    """

    # Import hexagon centroids coordinate in a geodataframe
    file_dir = os.path.join(
        data_dir, 'Spatial',
        f'Centroids_ISEA3H{resolution}_Geodetic_V_WGS84.shp')
    centroids = gpd.read_file(file_dir)

    # Import IGBP land cover class fractions for hexagons in a dataframe
    file_dir = os.path.join(
        data_dir, f'ISEA3H{resolution}_MCD12Q1_V06_Y{year}_IGBP_Fractions.txt')
    LC = pd.read_csv(file_dir, sep='\t')

    # Import bioclimate variables for hexagons in a dataframe
    file_dir = os.path.join(
        data_dir, f'ISEA3H{resolution}_WorldClim30AS_V02_BIO_Centroid.txt')
    BV = pd.read_csv(file_dir, sep='\t')

    # Merge IGBP land cover fractions, bioclimate variables and hexagone coordinates
    hexagons = centroids.merge(LC, on='HID')
    hexagons = hexagons.merge(BV, on='HID')
    n = hexagons.shape[0]

    if land:
        # Identify hexagons composed of water bodies only
        # Water bodies are identified by the fact that they are the ones and only
        # for which the sum of all land cover class fractions is 0
        LC_rowsums = LC.iloc[:, 1:].sum(axis=1)
        LC_water_idx = LC_rowsums[LC_rowsums == 0].index.values
        nw = len(LC_water_idx)

        # Identify hexagons not only composed of water bodies.
        LC_land_idx = np.array(list(set(range(n)) - set(LC_water_idx)))
        nl = len(LC_land_idx)

        # Identify hexagons that have no bioclimate variable available (NA=-100)
        mask_NA_rows = eval(' & '.join(
            [f'(BV.{col} == -100)' for col in BV.columns.values[1:]]))
        BV_NA_idx = BV[mask_NA_rows].index.values
        n_na = len(BV_NA_idx)

        # Identify hexagons that have at least one bioclimate variable available
        BV_A_idx = np.sort(
            np.array(list(set(BV.index.values) - set(BV_NA_idx))))
        n_a = len(BV_A_idx)

        # Identify hexagons wich satisfy both desired conditions
        land_A_idx = np.intersect1d(BV_A_idx, LC_land_idx)
        nl_a = len(land_A_idx)

        # Extract land hexagons with at least one bioclimate variable available.
        hexagons = hexagons.iloc[land_A_idx].copy()

    if drop_na:
        # Identify and remove rows with at least one bioclimate variable not
        # available (NA=-100)
        hexagons.replace(to_replace=-100, value=np.nan, inplace=True)
        hexagons.dropna(axis='index', how='any', inplace=True)
        n_aa = hexagons.shape[0]

    if stats:
        str2display = f"The entire dataset is composed of {n} hexagons."
        if land:
            str2display = str2display + f"""<ul>
            <li> {nl} correspond to lands ({100*nl/n:.4}% of the entire dataset);
            <li> {nw} correspond to water bodies ({100*nw/n:.4}% of the entire 
            dataset);
            <li> {n_a} have at least one bioclimate variable available
            ({100*n_a/n:.4}% of the entire dataset);
            <li> {n_na} have no bioclimate variable available
            ({100*n_na/n:.4}% of the entire dataset).</ul>
            When considering only the land hexagons: <ul>
            <li> {nl_a} hexagons have at least one bioclimate variable available 
            ({100*nl_a/nl:.4}% of the land hexagons and {100*nl_a/n:.4}% of the 
            entire dataset);"""
            if drop_na:
                str2display = str2display + f"""
                <li> {n_aa} hexagons have all the bioclimate variables available
                ({100*n_aa/nl:.4}% of the land hexagons and {100*n_aa/n:.4}% of 
                the entire dataset) </ul>"""
        elif drop_na:
            str2display = str2display + f"""<br>
            Only {n_aa} hexagons have all the bioclimatic variables available
            ({100*n_aa/n:.4}% of the entire dataset)."""
        display(Markdown(str2display))

    return hexagons
Exemple #10
0
def pandas_df_to_Markdown_table(df):
    fmt = ['---' for i in range(len(df.columns))]
    df_fmt = pd.DataFrame([fmt], columns=df.columns)
    df_fmated = pd.concat([df_fmt, df])
    display(Markdown(df_fmated.to_csv(sep='|', index=False)))
Exemple #11
0
def EmpiricalDistribution(data,
                          NA_val=-100,
                          figsize=(15, 8),
                          show=True,
                          save=False,
                          plot_dir='Output/Plots',
                          title='ED',
                          save_params={}):
    """
    INPUT:
     - data -> Data vector (Series or array-like) 
     - NA_val -> Value used for NA.  
     - figsize -> Size of the figure object. (tuple of int: (width, height))
     - show -> Choose whether or not to display the plot. (bool) 
     - save -> Choose whether or not to save the plot. (bool)
     - plot_dir -> Plot saving directory. (path string)
     - title -> Name of the plot file without file extension. (string)
     - save_params -> Parameters for the saving operation. (dict)                
    """

    # Default values for parameter dictionaries:
    # save_params
    SP = {'format': 'jpg'}

    # Update parameter dictionaries with user choices
    SP.update(save_params)

    # Remove NA values if needed
    NA_mask = data != NA_val
    data = data[NA_mask]
    n_NA = (~NA_mask).sum()
    if n_NA > 0:
        display(Markdown(f"There are {n_NA} not avalable data points"))

    fig = plt.figure(figsize=figsize)
    gs = plt.GridSpec(nrows=2, ncols=2, figure=fig)
    ax0 = fig.add_subplot(gs[0, 0])
    ax1 = fig.add_subplot(gs[0, 1])
    ax2 = fig.add_subplot(gs[1, :])

    # Frequency histogram
    bw = (data.max() - data.min()) / 100
    sns.histplot(data=data, color='darkblue', binwidth=bw, ax=ax0)
    ax0.set(xlabel='', ylabel='Counts')

    # Density histogram
    sns.kdeplot(x=data, color='darkblue', fill=True, ax=ax1)
    ax1.set(xlabel='', ylabel='Density')

    # Boxplot
    sns.boxplot(x=data, color='skyblue', fliersize=3, ax=ax2)
    ax2.set_xlabel('')

    fig.tight_layout()

    # Save the plot if needed
    if save:
        # Output file directory
        file_dir = os.path.join(plot_dir, f"{title}.{SP['format']}")
        plt.savefig(file_dir, **SP)
    # Prevent display of the plot if needed
    if not show:
        plt.close()
Exemple #12
0
# 
# ![training conditional proportions](./images/training_conditional_proportions.png "Class Conditional Proportions")
# 
# Hint:
# - You can use DataFrame's `.melt` method to "unpivot" a DataFrame. See the following code cell for an example.

# In[52]:


from IPython.display import display, Markdown
df = pd.DataFrame({
    'word_1': [1, 0, 1, 0],
    'word_2': [0, 1, 0, 1],
    'type': ['spam', 'ham', 'ham', 'ham']
})
display(Markdown("> Our Original DataFrame has some words column and a type column. You can think of each row as a sentence, and the value of 1 or 0 indicates the number of occurances of the word in this sentence."))
display(df);
display(Markdown("> `melt` will turn columns into variale, notice how `word_1` and `word_2` become `variable`, their values are stored in the value column"))
display(df.melt("type"))


# ### Question 3a
# 
# Create a bar chart like the one above comparing the proportion of spam and ham emails containing certain words. Choose a set of words that are different from the ones above, but also have different proportions for the two classes. Make sure to only consider emails from `train`.
# 
# <!--
# BEGIN QUESTION
# name: q3a
# points: 2
# manual: true
# image: true
Exemple #13
0
def html_hello_world():
    from IPython.display import display, Markdown
    txt = open("minimal.html").read()
    L = ["```HTML", txt, "```"]
    md = "\n".join(L)
    display(Markdown(md))
Exemple #14
0
def printmd(txt: str = ""):
    display(Markdown(txt))
Exemple #15
0
 def inputs(self):
     display(Markdown('## INPUTS'), qgrid.show_grid(self.info.in_df, grid_options={'editable': False}))
     if self.info.loader_df.size:
         display(Markdown('## LOADERS'), qgrid.show_grid(self.info.loader_df, grid_options={'editable': False}))
Exemple #16
0
def colab_link(path):
    "Get a link to the notebook at `path` on Colab"
    cfg = get_config()
    res = f'https://colab.research.google.com/github/{cfg.user}/{cfg.lib_name}/blob/{cfg.branch}/{cfg.path("nbs_path").name}/{path}.ipynb'
    display(Markdown(f'[Open `{path}` in Colab]({res})'))
m = folium.Map([-20.0760232, 34.3582913909869],
               zoom_start=10,
               tiles="CartoDb dark_matter")
locs_dtm_short = zip(dtm_short_gdf.lat, dtm_short_gdf.lon)
locs_hsio_short = zip(hsio_short_gdf.lat, hsio_short_gdf.lon)
for location in locs_dtm_short:
    folium.CircleMarker(location=location, color="red", radius=4).add_to(m)
for location in locs_hsio_short:
    folium.CircleMarker(location=location, color="white", radius=2).add_to(m)
#m.save("map1.html")
m

# %%
Markdown(
    f" There are {len(dtm_short)} DTM tracked displacement sites in Mozambique and {len(hsio_short)} health facilities"
)


# %%
# calculate the nearest health facility from each site
def calculate_nearest(row, destination, val, col="geometry"):
    dest_unary = destination["geometry"].unary_union
    nearest_geom = nearest_points(row[col], dest_unary)
    match_geom = destination.loc[destination.geometry == nearest_geom[1]]
    match_value = match_geom[val].to_numpy()[0]
    return match_value


dtm_short_gdf["nearest_geom"] = dtm_short_gdf.apply(calculate_nearest,
                                                    destination=hsio_short_gdf,
    def __init__(self,
                 image,
                 fig_xsize=None,
                 fig_ysize=None,
                 cmap=plt.cm.gist_gray,
                 vmin=None,
                 vmax=None):
        display(
            Markdown(
                f"<text style=color:blue><b>Area of Interest Selector Tips:\n</b></text>"
            ))
        display(
            Markdown(
                f'<text style=color:blue>- This plot uses "matplotlib notebook", whereas the other plots in this notebook use "matplotlib inline".</text>'
            ))
        display(
            Markdown(
                f'<text style=color:blue>-  If you run this cell out of sequence and the plot is not interactive, rerun the "%matplotlib notebook" code cell.</text>'
            ))
        display(
            Markdown(
                f'<text style=color:blue>- Use the pan tool to pan with the left mouse button.</text>'
            ))
        display(
            Markdown(
                f'<text style=color:blue>- Use the pan tool to zoom with the right mouse button.</text>'
            ))
        display(
            Markdown(
                f'<text style=color:blue>- You can also zoom with a selection box using the zoom to rectangle tool.</text>'
            ))
        display(
            Markdown(
                f'<text style=color:blue>- To turn off the pan or zoom to rectangle tool so you can select an AOI, click the selected tool button again.</text>'
            ))

        display(
            Markdown(f'<text style=color:darkred><b>IMPORTANT!</b></text>'))
        display(
            Markdown(
                f'<text style=color:darkred>- Upon loading the AOI selector, the selection tool is already active.</text>'
            ))
        display(
            Markdown(
                f'<text style=color:darkred>- Click, drag, and release the left mouse button to select an area.</text>'
            ))
        display(
            Markdown(
                f'<text style=color:darkred>- The square tool icon in the menu is <b>NOT</b> the selection tool. It is the zoom tool.</text>'
            ))
        display(
            Markdown(
                f'<text style=color:darkred>- If you select any tool, you must toggle it off before you can select an AOI</text>'
            ))
        self.image = image
        self.x1 = None
        self.y1 = None
        self.x2 = None
        self.y2 = None
        if not vmin:
            self.vmin = np.nanpercentile(self.image, 1)
        else:
            self.vmin = vmin
        if not vmax:
            self.vmax = np.nanpercentile(self.image, 99)
        else:
            self.vmax = vmax
        if fig_xsize and fig_ysize:
            self.fig, self.current_ax = plt.subplots(figsize=(fig_xsize,
                                                              fig_ysize))
        else:
            self.fig, self.current_ax = plt.subplots()
        self.fig.suptitle('Area-Of-Interest Selector', fontsize=16)
        self.current_ax.imshow(self.image,
                               cmap=plt.cm.gist_gray,
                               vmin=self.vmin,
                               vmax=self.vmax)

        def toggle_selector(self, event):
            print(' Key pressed.')
            if event.key in ['Q', 'q'] and toggle_selector.RS.active:
                print(' RectangleSelector deactivated.')
                toggle_selector.RS.set_active(False)
            if event.key in ['A', 'a'] and not toggle_selector.RS.active:
                print(' RectangleSelector activated.')
                toggle_selector.RS.set_active(True)

        toggle_selector.RS = RectangleSelector(
            self.current_ax,
            self.line_select_callback,
            drawtype='box',
            useblit=True,
            button=[1, 3],  # don't use middle button
            minspanx=5,
            minspany=5,
            spancoords='pixels',
            rectprops=dict(facecolor='red',
                           edgecolor='yellow',
                           alpha=0.3,
                           fill=True),
            interactive=True)
        plt.connect('key_press_event', toggle_selector)
Exemple #19
0
 def displayMD(self, md):
     display(Markdown(md))
Exemple #20
0
def PrintModelSettings():
    display(Markdown('#### Please check your settings:'))
    print(GetFormattedSettings())

    display(Markdown('#### Results to be saved at:'))
    print(savepath.value)
Exemple #21
0
def generate_column_correlation_network(df,
                                        th=0.8,
                                        edge_labels_flag=True,
                                        layout="spring_layout"):

    G = nx.Graph()
    col_numerical = list(df.select_dtypes([np.number]).columns)
    comb2_col_numerical = list(itertools.combinations(
        col_numerical, 2))  # Make combinations from col_numerical

    # go over all combinations calculate correlation
    for rec in comb2_col_numerical:
        col1 = rec[0]
        col2 = rec[1]
        corr = df[col1].corr(df[col2], method='pearson')
        corr = round(corr, 2)
        if abs(corr) >= th:  # if correlation is high enoigh add edge to graph
            G.add_edge(col1, col2, weight=corr)

    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    if num_nodes <= 1 or num_edges == 0:
        print("netwotk has no nodes (or just 1) or edges")
        return

    # calculate communities
    if num_edges > 1:
        communities_generator = community.girvan_newman(G)
        top_level_communities = next(communities_generator)
        next_level_communities = next(communities_generator)

        display(Markdown("## Communites:"))
        node_to_cc_dict = dict()
        cc = sorted(map(sorted, next_level_communities))
        counter = 0
        if len(cc) == 0:
            print("netwotk has no communites")
            return

        for c in cc:
            counter += 1
            for n in list(c):
                node_to_cc_dict[n] = counter
            print(c)
            print("-------------------")

    # print communites and plot
    display(Markdown("## Network:"))
    print("number of nodes:", num_nodes)
    print("number of edges:", num_edges)
    plt.figure(figsize=(26, 10))
    if layout == "spring_layout":
        pos = nx.spring_layout(G, k=0.15, iterations=20, scale=2)
    elif layout == "planar_layout":
        pos = nx.planar_layout(G)
    elif layout == "circular_layout":
        pos = nx.circular_layout(G)
    else:
        print("problrm with choosing a layout")

    if num_edges > 1:
        com_values = [node_to_cc_dict[n] for n in G.nodes()]
        nx.draw_networkx(G,
                         pos,
                         cmap=plt.get_cmap('jet'),
                         node_color=com_values,
                         with_labels=True,
                         alpha=0.6,
                         font_size=14)
    else:
        nx.draw_networkx(G,
                         pos,
                         cmap=plt.get_cmap('jet'),
                         with_labels=True,
                         alpha=0.6,
                         font_size=16)
    if edge_labels_flag:
        edge_labels = nx.get_edge_attributes(G, 'weight')
        nx.draw_networkx_edge_labels(G,
                                     pos,
                                     edge_labels=edge_labels,
                                     alpha=0.8)
    plt.show()
 def _printmd(self, string):
     display(Markdown(string))
Exemple #23
0
from pathlib import Path

from IPython.display import display, Markdown

version_info = (0, 0, 1)
__version__ = '.'.join(map(str, version_info))

github_url = "https://github.com/ContextLab/sherlock-topic-model-paper/tree/master/code/sherlock_helpers"
pkg_dir = Path(__file__).resolve().parent
message = Markdown(
    "Helper functions and variables used across multiple notebooks can be "
    f"found in `{pkg_dir}`, or on GitHub, [here]({github_url}).<br />You can "
    "also view source code directly from the notebook with:<br /><pre>    "
    "from sherlock_helpers.functions import show_source<br />    show_source(foo)"
    "</pre>")

try:
    # check whether package was imported from a notebook
    get_ipython()
    display(message)
except NameError:
    pass