Esempio n. 1
0
def gen_report_with_files(datadir: Path,
                          single_file: bool = False) -> dp.Report:
    # Asset tests
    lis = [1, 2, 3]
    df = gen_df(10000)
    md_block = dp.Markdown(
        text="# Test markdown block <hello/> \n Test **content**")

    list_asset = dp.File(data=lis, name="List Asset", is_json=True)

    img_asset = dp.File(file=datadir / "datapane-logo.png")

    plot_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x",
                                                                     y="y"),
                         caption="Plot Asset")

    df_asset = dp.Table(df=df, caption="Test Dataframe Table")

    pivot_asset = dp.Table(df=df,
                           caption="Test Dataframe PivotTable",
                           can_pivot=True)

    if single_file:
        return dp.Report(dp.Blocks([md_block, plot_asset]))
    else:
        return dp.Report(list_asset, img_asset, df_asset, md_block, plot_asset,
                         pivot_asset)
Esempio n. 2
0
def test_report(tmp_path: Path):
    df = gen_df()
    name = gen_name()
    headline = gen_headline()

    # create a basic report
    m = dp.Markdown("hello world!!")

    # Asset tests
    lis = [1, 2, 3]
    json_list: str = json.dumps(lis)
    plot = alt.Chart(df).mark_line().encode(x="x", y="y")

    # create the DP
    fn = tmp_path / "json_list.json"
    fn.write_text(data=json_list)
    file_asset = dp.File(file=fn)
    json_asset = dp.File(data=json_list, is_json=True)
    plot_asset = dp.Plot(data=plot)
    list_asset = dp.File(data=lis, is_json=True)
    df_asset = dp.Table(df=df, caption="Our Dataframe")
    dp_report = api.Report(m, file_asset, df_asset, json_asset, plot_asset, list_asset)
    dp_report.publish(name=name, headline=headline)

    with deletable(dp_report):
        # are the fields ok
        assert dp_report.headline == headline
        assert len(dp_report.top_block.blocks) == 6
Esempio n. 3
0
def test_full_report(tmp_path: Path):
    df = gen_df()
    name = gen_name()
    description = gen_description()
    source_url = "https://github.com/datapane/datapane"
    # create a basic report
    m = dp.Text("hello world!!")

    # Asset tests
    lis = [1, 2, 3]
    json_list: str = json.dumps(lis)
    plot = gen_plot()

    # create the DP
    fn = tmp_path / "json_list.json"
    fn.write_text(data=json_list)
    file_asset = dp.File(file=fn)
    json_asset = dp.File(data=json_list, is_json=True)
    plot_asset = dp.Plot(data=plot)
    list_asset = dp.File(data=lis, is_json=True)
    df_asset = dp.DataTable(df=df, caption="Our Dataframe")
    dp_report = dp.Report(m, file_asset, df_asset, json_asset, plot_asset, list_asset)
    dp_report.upload(name=name, description=description, source_url=source_url)

    with deletable(dp_report):
        # are the fields ok
        check_name(dp_report, name)
        assert dp_report.description == description
        assert dp_report.source_url == source_url
        assert len(dp_report.pages[0].blocks[0].blocks) == 6
def LoadReport(title, X, Y, prediccion, anios):
    """ Función de utilidad para generar el reporte en Datapane. Crea una tabla
    que contiene los valores de las predicciones, así como las métricas con las
    que se evalúan los modelos. También contiene una gráfica de puntos con los 
    valores de la predicción.
    
    Args:
        title (str): titulo del reporte de datapane.
        X (:obj: `numpy.array`): datos reales de prueba.
        Y (:obj: `numpy.array`): datos reales de predicción.
        prediccion (:obj: `numpy.array`): datos predichos.
        anios (int): número de años que se predijeron.
       
    Returns:
        (:obj: `datapane.Report`): reporte de datapane listo para publicar o guardar.
    """
    anios_ = [1998 + i for i in range(len(X))]
    n1 = len(anios_)
    anios_ += [1997 + len(X) + i for i in range(len(Y) + 1)]
    n2 = len(anios_) - n1
    anios_ += [1997 + len(X) + i for i in range(len(Y) + 1)]
    n3 = len(anios_) - n2 - n1

    series = ['Datos de entrenamiento'] * n1
    series += ['Datos de prueba'] * n2
    series += ['Datos predichos'] * n3

    alumnos = list(X)
    alumnos += (list(X) + list(Y))[-(anios + 1):]
    alumnos += (list(X) + list(prediccion))[-(anios + 1):]

    df = pd.DataFrame(data={
        'Serie': series,
        'Año': anios_,
        'Alumnos': alumnos
    })

    columns = ['Año %d' % (i) for i in range(1, anios + 1)]
    columns += ['MAPE', 'MAE', 'RMSE']

    metricas = np.zeros(3)
    # MAPE
    metricas[0] = np.abs((prediccion - Y) / Y).mean()
    # MAE
    metricas[1] = np.abs(prediccion - Y).mean()
    # RMSE
    metricas[2] = np.sqrt(np.square(prediccion - Y).mean())

    metricas = pd.DataFrame(np.array([list(prediccion) + list(metricas)]),
                            columns=columns)

    # Crear grafica
    chart = alt.Chart(df).mark_line().encode(
        x='Año', y='Alumnos', color='Serie').mark_line(
            point=True).interactive().properties(title=title)

    # Crear reporte
    reporte = dp.Report(dp.Table(metricas), dp.Plot(chart))
    return reporte
Esempio n. 5
0
    def publish(
        self,
        name="Folium Map",
        description="",
        source_url="",
        visibility="DEFAULT",
        open=True,
        tags=None,
        **kwargs,
    ):
        """Publish the map to datapane.com

        Args:
            name (str, optional): The document name - can include spaces, caps, symbols, etc., e.g. "Profit & Loss 2020". Defaults to "Folium Map".
            description (str, optional): A high-level description for the document, this is displayed in searches and thumbnails. Defaults to ''.
            source_url (str, optional): A URL pointing to the source code for the document, e.g. a GitHub repo or a Colab notebook. Defaults to ''.
            visibility (str, optional): Visibility of the map. It can be one of the following: PORTFOLIO, PRIVATE, DEFAULT. Defaults to 'DEFAULT'.
            open (bool, optional): Whether to open the map. Defaults to True.
            tags (bool, optional): A list of tags (as strings) used to categorise your document. Defaults to None.
        """
        import webbrowser

        try:
            import datapane as dp
        except Exception:
            webbrowser.open_new_tab(
                "https://docs.datapane.com/tut-getting-started")
            raise ImportError(
                "The datapane Python package is not installed. You need to install and authenticate datapane first."
            )

        try:

            visibility = visibility.upper()

            if visibility not in ["DEFAULT", "PRIVATE", "PORTFOLIO"]:
                raise ValueError(
                    "The visibility argument must be either DEFAULT or PRIVATE or PORTFOLIO."
                )

            if visibility == "PRIVATE":
                visibility = dp.Visibility.PRIVATE
            elif visibility == "PORTFOLIO":
                visibility = dp.Visibility.PORTFOLIO
            else:
                visibility = dp.Visibility.DEFAULT

            dp.Report(dp.Plot(self)).upload(
                name=name,
                description=description,
                source_url=source_url,
                visibility=visibility,
                open=open,
                tags=tags,
            )

        except Exception as e:
            raise Exception(e)
Esempio n. 6
0
def run(db_url):
    engine = Engine(db_url)
    graph_posts_per_day = process_posts_per_day(engine)
    post_per_day_caption = (
        f"Number of posts updated per day (updated {get_current_time_in_vietname()})"
    )
    df_posts_per_domain = process_posts_per_domain(engine)
    df_domain_sim, graph_domain_sim_score = process_sim_score_table(engine)
    report = dp.Report(
        dp.Plot(graph_posts_per_day, caption=post_per_day_caption),
        dp.DataTable(df_posts_per_domain,
                     caption="Number of posts per domain"),
        dp.DataTable(df_domain_sim, caption="Simplified similar_docs table"),
        dp.Plot(graph_domain_sim_score,
                caption="Similarity score histogram (>= 0.5)"),
    )
    report.publish(name="TopDup monitoring table", open=False)
    print("View report at "
          "https://datapane.com/u/tiepvupsu/reports/topdup-monitoring-table/")
Esempio n. 7
0
def test_text_report_files():
    s_df = gen_df()
    b_df = gen_df(1000)
    plot_asset = dp.Plot(data=gen_plot(), caption="Plot Asset")

    report = dp.TextReport(s_df, b_df, plot_asset).upload(name="Test")
    with deletable(report):
        _calc_text_blocks(report, 3)

        report = dp.TextReport(text="Text", s_df=s_df, b_df=b_df, plot=plot_asset).upload(name="Test")
        _calc_text_blocks(report, 4)
Esempio n. 8
0
def gen_report_complex_with_files(datadir: Path,
                                  single_file: bool = False,
                                  local_report: bool = False) -> dp.Report:
    # Asset tests
    lis = [1, 2, 3]
    small_df = gen_df()
    big_df = gen_df(10000)

    # text
    # md_block
    html_block = dp.HTML(html="<h1>Hello World</h1>")
    html_block_1 = dp.HTML(html=h2("Hello World"))
    code_block = dp.Code(code="print('hello')", language="python")
    formula_block = dp.Formula(formula=r"\frac{1}{\sqrt{x^2 + 1}}")
    big_number = dp.BigNumber(heading="Tests written", value=1234)
    big_number_1 = dp.BigNumber(heading="Real Tests written :)",
                                value=11,
                                change=2,
                                is_upward_change=True)
    embed_block = dp.Embed(url="https://www.youtube.com/watch?v=JDe14ulcfLA")

    # assets
    plot_asset = dp.Plot(data=gen_plot(), caption="Plot Asset")
    list_asset = dp.File(data=lis, filename="List Asset", is_json=True)
    img_asset = dp.File(file=datadir / "datapane-logo.png")

    # tables
    table_asset = dp.Table(data=small_df, caption="Test Basic Table")
    # local reports don't support DataTable
    dt_asset = table_asset if local_report else dp.DataTable(
        df=big_df, caption="Test DataTable")

    if single_file:
        return dp.Report(dp.Group(blocks=[md_block, dt_asset]))
    else:
        return dp.Report(
            dp.Page(
                dp.Select(md_block,
                          html_block,
                          html_block_1,
                          code_block,
                          formula_block,
                          embed_block,
                          type=dp.SelectType.TABS),
                dp.Group(big_number, big_number_1, columns=2),
            ),
            dp.Page(
                plot_asset,
                list_asset,
                img_asset,
                table_asset,
                dt_asset,
            ),
        )
Esempio n. 9
0
def publish_report(df: pandas.DataFrame):
    dp_token: str = db["DP_TOKEN"]
    dp.login(token=dp_token)

    plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode(
        x='date:T',
        y=alt.Y('new_cases_smoothed_per_million:Q', stack=None),
        color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')),
        tooltip='continent:N').interactive().properties(width='container')

    dp.Report(dp.Plot(plot), dp.DataTable(df)).save(path='report.html',open=True)
def plot_planning(planning, need, timeline):
    # Plot graph - Requirement
    source = need.copy()
    source = source.rename(columns={0: "Hours"})
    source["Date"] = source.index

    bars_need = (alt.Chart(source).mark_bar().encode(
        y="Hours:Q",
        column=alt.Column("Date:N"),
        tooltip=["Date", "Hours"],
    ).interactive().properties(
        width=550 / len(timeline) - 22,
        height=75,
        title='Requirement',
    ))

    # Plot graph - Optimized planning
    source = planning.filter(like="Total hours", axis=0).copy()
    source["Date"] = list(source.index.values)
    source = source.rename(columns={"Solution": "Hours"}).reset_index()
    source[["Date", "Line"]] = source["Date"].str.split(",", expand=True)
    source["Date"] = source["Date"].str.split("[").str[1]
    source["Line"] = source["Line"].str.split("]").str[0]
    source["Min capacity"] = 7
    source["Max capacity"] = 12
    source = source.round({"Hours": 1})
    source["Load%"] = pd.Series(
        ["{0:.0f}%".format(val / 8 * 100) for val in source["Hours"]],
        index=source.index,
    )

    bars = (alt.Chart(source).mark_bar().encode(
        x="Line:N",
        y="Hours:Q",
        column=alt.Column("Date:N"),
        color="Line:N",
        tooltip=["Date", "Line", "Hours", "Load%"],
    ).interactive().properties(
        width=550 / len(timeline) - 22,
        height=150,
        title="Optimized Production Schedule",
    ))

    chart = alt.vconcat(bars, bars_need)
    chart.save("planning_time_model2.html")

    dp.Report(dp.Plot(
        chart, caption="Production schedule model 2 - Time")).publish(
            name="Optimized production schedule model 2 - Time",
            description="Optimized production schedule model 2 - Time",
            open=True,
            visibily="PUBLIC",
        )
Esempio n. 11
0
    def publish(
        self,
        name=None,
        headline="Untitled",
        visibility="PUBLIC",
        overwrite=True,
        open=True,
    ):
        """Publish the map to datapane.com

        Args:
            name (str, optional): The URL of the map. Defaults to None.
            headline (str, optional): Title of the map. Defaults to 'Untitled'.
            visibility (str, optional): Visibility of the map. It can be one of the following: PUBLIC, PRIVATE, ORG. Defaults to 'PUBLIC'.
            overwrite (bool, optional): Whether to overwrite the existing map with the same name. Defaults to True.
            open (bool, optional): Whether to open the map. Defaults to True.
        """
        import webbrowser

        try:
            import datapane as dp
        except Exception:
            webbrowser.open_new_tab(
                "https://docs.datapane.com/tutorials/tut-getting-started")
            raise ImportError(
                "The datapane Python package is not installed. You need to install and authenticate datapane first."
            )

        # import datapane as dp

        # import logging
        # logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)

        try:

            if name is None:
                name = "folium_" + random_string(6)

            visibility = visibility.upper()
            if visibility not in ["PUBLIC", "PRIVATE", "ORG"]:
                visibility = "PRIVATE"

            if overwrite:
                delete_dp_report(name)

            report = dp.Report(dp.Plot(self))
            report.publish(name=name,
                           headline=headline,
                           visibility=visibility,
                           open=open)

        except Exception as e:
            print(e)
Esempio n. 12
0
def test_markdown_format(datadir: Path):
    text = """
# My wicked markdown

{{plot}}

As above we do ...

{{select}}

Here's the dataset used...

{{}}
"""

    table_asset = gen_df()
    plot_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x",
                                                                     y="y"),
                         caption="Plot Asset")
    select_asset = dp.Select(dp.Text("Hello"), "World")

    # missing context
    with pytest.raises(DPError):
        dp.Text(text).format(table_asset,
                             plot=plot_asset,
                             select1=select_asset)
    with pytest.raises(DPError):
        dp.Text(text).format(plot=plot_asset, select=select_asset)

    # test string
    group = dp.Text(text).format(table_asset,
                                 plot=plot_asset,
                                 select=select_asset)
    assert isinstance(group, dp.Group)
    assert glom(group, ("blocks", ["_tag"])) == [
        "Text", "Plot", "Text", "Select", "Text", "Table"
    ]

    # test file
    group = dp.Text(file=datadir / "report.md").format(table_asset,
                                                       plot=plot_asset,
                                                       select=select_asset)
    assert isinstance(group, dp.Group)
    assert glom(group, ("blocks", ["_tag"])) == [
        "Text", "Plot", "Text", "Select", "Text", "Table"
    ]
    assert "file-input" in element_to_str(group)
    assert "file-input" in glom(group, "blocks.0.content")
Esempio n. 13
0
    def publish(
        self,
        name="Folium Map",
        description="",
        source_url="",
        tags=None,
        source_file=None,
        open=True,
        formatting=None,
        **kwargs,
    ):
        """Publish the map to datapane.com

        Args:
            name (str, optional): The document name - can include spaces, caps, symbols, etc., e.g. "Profit & Loss 2020". Defaults to "Folium Map".
            description (str, optional): A high-level description for the document, this is displayed in searches and thumbnails. Defaults to ''.
            source_url (str, optional): A URL pointing to the source code for the document, e.g. a GitHub repo or a Colab notebook. Defaults to ''.
            tags (bool, optional): A list of tags (as strings) used to categorise your document. Defaults to None.
            source_file (str, optional): Path of jupyter notebook file to upload. Defaults to None.
            open (bool, optional): Whether to open the map. Defaults to True.
            formatting (ReportFormatting, optional): Set the basic styling for your report.
        """
        import webbrowser

        try:
            import datapane as dp
        except Exception:
            webbrowser.open_new_tab("https://docs.datapane.com/tut-getting-started")
            raise ImportError(
                "The datapane Python package is not installed. You need to install and authenticate datapane first."
            )

        try:

            dp.Report(dp.Plot(self)).upload(
                name=name,
                description=description,
                source_url=source_url,
                tags=tags,
                source_file=source_file,
                open=open,
                formatting=formatting,
            )

        except Exception as e:
            raise Exception(e)
Esempio n. 14
0
def gen_report_with_files(datadir: Path,
                          single_file: bool = False) -> dp.Report:
    # Asset tests
    lis = [1, 2, 3]
    small_df = gen_df()
    big_df = gen_df(10000)

    # text
    md_block = dp.Markdown(
        text="# Test markdown block </hello> \n Test **content**")
    html_block = dp.HTML(html="Hello World</hello>")
    big_number = dp.BigNumber(heading="Tests written", value=1234)
    big_number_1 = dp.BigNumber(heading="Real Tests written :)",
                                value=11,
                                change=2,
                                is_upward_change=True)

    # assets
    plot_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x",
                                                                     y="y"),
                         caption="Plot Asset")
    list_asset = dp.File(data=lis, name="List Asset", is_json=True)
    img_asset = dp.File(file=datadir / "datapane-logo.png")

    # tables
    table_asset = dp.Table(data=small_df, caption="Test Basic Table")
    dt_asset = dp.DataTable(df=big_df, caption="Test DataTable")
    dt_pivot_asset = dp.DataTable(df=big_df,
                                  caption="Test DataTable with Pivot",
                                  can_pivot=True)

    if single_file:
        return dp.Report(dp.Blocks(blocks=[md_block, plot_asset]))
    else:
        return dp.Report(
            md_block,
            html_block,
            big_number,
            big_number_1,
            plot_asset,
            list_asset,
            img_asset,
            table_asset,
            dt_asset,
            dt_pivot_asset,
        )
author_rank = dp.Blob.get('author_rank', owner='khuyentran1401').download_obj()
author_count = pickle.load(open('./author_count', 'rb'))
publication_count = pickle.load(open('./publication_count', 'rb'))
publication_rank = pickle.load(open('./publication_rank', 'rb'))

# size of the data
data_size = len(author_rank)

# load parameters from input
dp.Params.load_defaults('datapane.yaml')
author = dp.Params.get('author_name', 'Khuyen Tran')
publication = dp.Params.get('publication_name')

percentile_author = 100 - (list(author_rank).index(author) +
                           1) / len(author_rank) * 100
percentile_publications = 100 - (list(publication_rank).index(publication) +
                                 1) / len(publication_rank) * 100

dp.Report(
    dp.Markdown("# Medium Visualization"),
    dp.Plot(author_count),
    dp.Markdown(f'''
   Author {author} ranks {list(author_rank).index(author)+1} out of {data_size} authors who publish most frequently on topics related to data science last year, which is
    in the {str(round(percentile_author,2))}% percentile
   '''),
    dp.Plot(publication_count),
    dp.Markdown(
        f'''Publication {publication} ranks {list(publication_rank).index(publication)+1} out of {data_size} publications which publish most frequently on topics related to data science last year, which is
    in the {str(round(percentile_publications,2))}% percentile'''),
).publish(headline='Medium Visualization', name='medium_report')
Esempio n. 16
0
"""My cool plot"""
import pandas as pd
import altair as alt
import datapane as dp

df = pd.read_csv(
    "https://query1.finance.yahoo.com/v7/finance/download/GOOG?period2=1585222905&interval=1mo&events=history"
)

chart = (alt.Chart(df).encode(
    x="Date:T",
    y="Open",
    y2="Close",
    color=alt.condition("datum.Open <= datum.Close", alt.value("#06982d"),
                        alt.value("#ae1325")),
).mark_bar().interactive())

report = [dp.Plot(chart)]
Esempio n. 17
0
import numpy as np


def gen_df(dim: int = 4) -> pd.DataFrame:
    axis = [i for i in range(0, dim)]
    data = {"x": axis, "y": axis}
    return pd.DataFrame.from_dict(data)


lis = [1, 2, 3]
df = gen_df(10000)

# Bokeh
p = figure(title="simple line example", x_axis_label='x', y_axis_label='y')
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], legend_label="Temp.", line_width=2)
bokeh_asset = dp.Plot(data=p)

# Folium
m = folium.Map(
    location=[45.372, -121.6972],
    zoom_start=12,
    tiles='Stamen Terrain'
)
folium.Marker(
    location=[45.3288, -121.6625],
    popup='Mt. Hood Meadows',
    icon=folium.Icon(icon='cloud')
).add_to(m)
folium.Marker(
    location=[45.3311, -121.7113],
    popup='Timberline Lodge',
Esempio n. 18
0
"""{{ name }} report"""
import altair as alt
import pandas as pd
import datapane as dp

# get the data
dataset = pd.read_csv(
    "https://covid.ourworldindata.org/data/owid-covid-data.csv")
df = dataset.groupby(
    ["continent",
     "date"])["new_cases_smoothed_per_million"].mean().reset_index()

# build an altair plot
plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode(
    x='date:T',
    y=alt.Y('new_cases_smoothed_per_million:Q', stack=None),
    color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')),
    tooltip='continent:N').interactive().properties(width='container')

# embed data and plot into a Datapane report and publish
report = dp.Report("## Covid data per continent", dp.Plot(plot), dp.Table(df))
report.publish(
    name="Covid Demo {{ name }}",
    description=
    "Plot of Covid infections per continent, using data from ourworldindata",
    open=True,
)
Esempio n. 19
0
import datapane as dp
from datapane.client.api.templates import gen_df, gen_plot
from bokeh.plotting import figure
from pathlib import Path
import folium
import plotly.graph_objects as go
from matplotlib.collections import EventCollection
import matplotlib.pyplot as plt
import numpy as np

lis = [1, 2, 3]

# Bokeh
p = figure(title="simple line example", x_axis_label='x', y_axis_label='y')
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], legend_label="Temp.", line_width=2)
bokeh_asset = dp.Plot(data=p)

# Folium
m = folium.Map(location=[45.372, -121.6972],
               zoom_start=12,
               tiles='Stamen Terrain')
folium.Marker(location=[45.3288, -121.6625],
              popup='Mt. Hood Meadows',
              icon=folium.Icon(icon='cloud')).add_to(m)
folium.Marker(location=[45.3311, -121.7113],
              popup='Timberline Lodge',
              icon=folium.Icon(color='green')).add_to(m)
folium.Marker(location=[45.3300, -121.6823],
              popup='Some Other Location',
              icon=folium.Icon(color='red', icon='info-sign')).add_to(m)
folium_asset = dp.Plot(data=m)
Esempio n. 20
0
>*Here, we can look at the contexts in which different particles appear most frequently, and see how these contexts compare to conventional wisdom/rules about how the particles are used*

### は & が
#### Common Heuristics
+ One common way to differentiate は and が is that は marks the *topic* of a sentence, where が marks the grammatical *subject* in a sentence. 
+ In this sense, は can lend more emphasis to the subject it marks as compared to が
+ が tends to be used more in noun and subordinate clauses, and if は is used in these contexts, it tends to be for emphasis

#### Insights
+ **Token**
    * は and が are used in much the same contexts, with the main area of divergence occurring in the detailed part of speech
    * は functions exclusively as a linking particle, while が functions mostly as a case marking particle
+ **Syntactic Head**
    * Whereas が occurs largely with subjects of sentences, は is more evenly split between subjects and indirect objects
    * This trend shows across dependencies and part of speech distributions, with は appearing in a wider range of contexts
                 """), dp.Plot(div_six_はが),
    dp.Markdown("""
### と & も
#### Common Heuristics
+ と and も are both used to talk about more than one thing in a sentence
+ と is used to list things (very similar to "and" in English)
+ も takes a previous statement or context and applies it to a new subject (similar to "as well" or "also")
+ も can replace subject/topic markers は and が

#### Insights
+ **Token**
    * と and も occur almost entirely in the same contexts when looking at dependencies
    * When looking at the tags, と functions as a case marker, while も functions as a linking particle
+ **Syntactic Head**
    * The largest difference between と and も syntactic heads occurs in the dependencies, with と occurring alongside nominal and adverbial clause modifiers, and も occurring with oblique nominals and indirect objects
Esempio n. 21
0
'''
DATAPANE INTERACTIVE REPORT

pip install datapane

# https://datapane.com/reports/?name=&owned_by_me=on&order=
# Remember to login
# https://towardsdatascience.com/introduction-to-datapane-a-python-library-to-build-interactive-reports-4593fd3cb9c8
'''

import pandas as pd
import altair as alt
import datapane as dp

df = pd.read_csv(
    'https://query1.finance.yahoo.com/v7/finance/download/GOOG?period2=1585222905&interval=1mo&events=history'
)

chart = alt.Chart(df).encode(x='Date:T', y='Open').mark_line().interactive()

# Once you have the df and the chart, simply use
r = dp.Report(
    dp.Markdown('My simple report'),  #add description to the report
    dp.Table(df),  #create a table
    dp.Plot(chart)  #create a chart
)

# Publish your report. Make sure to have visibility='PUBLIC' if you want to share your report
r.publish(name='stock_report', visibility='PUBLIC')
Esempio n. 22
0
import pandas as pd
import altair as alt
import datapane as dp
dataset = pd.read_csv(
    'https://covid.ourworldindata.org/data/owid-covid-data.csv')
df = dataset.groupby(
    ['continent',
     'date'])['new_cases_smoothed_per_million'].mean().reset_index()
plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode(
    x='date:T',
    y=alt.Y('new_cases_smoothed_per_million:Q', stack=None),
    color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')),
    tooltip='continent:N').interactive().properties(width='container')
dp.Report(dp.Plot(plot), dp.Table(df)).publish(name='covid_report', open=True)
Esempio n. 23
0
    'https://covid.ourworldindata.org/data/vaccinations/vaccinations-by-manufacturer.csv',
    parse_dates=['date'])
df = df.groupby(['vaccine',
                 'date'])['total_vaccinations'].sum().tail(1000).reset_index()

# plot vaccinations over time using Altair
plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode(
    x='date:T',
    y=alt.Y('total_vaccinations:Q'),
    color=alt.Color('vaccine:N', scale=alt.Scale(scheme='set1')),
    tooltip='vaccine:N').interactive().properties(width='container')

# tablulate total vaccinations by manufacturer
total_df = df[df["date"] == df["date"].max()].sort_values(
    "total_vaccinations", ascending=False).reset_index(drop=True)
total_styled = total_df.style.bar(subset=["total_vaccinations"],
                                  color='#5fba7d',
                                  vmax=total_df["total_vaccinations"].sum())

# embed into a Datapane Report
report = dp.Report(
    "## Vaccination Report",
    dp.Plot(plot, caption="Vaccinations by manufacturer over time"),
    dp.Table(total_styled,
             caption="Current vaccination totals by manufacturer"),
    dp.Table(df, caption="Initial Dataset"))
report.upload(
    name='Covid Vaccinations Demo {{ name }}',
    description="Covid Vaccinations report, using data from ourworldindata",
    open=True)
pip install datapane


# In[2]:


import pandas as pd
import altair as alt
import datapane as dp


# In[3]:


df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/GOOG?period2=1585222905&interval=1mo&events=history')

chart = alt.Chart(df).encode(
    x='Date:T',
    y='Open'
).mark_line().interactive()

r = dp.Report(dp.Table(df), dp.Plot(chart))
r.save(path='report.html', open=True)


# In[ ]:




Esempio n. 25
0
nflx = nflx[['Open', 'Close', 'Volume']]
nflx["index"] = np.arange(len(nflx))

fig8 = go.Figure(
    ff.create_scatterplotmatrix(nflx,
                                diag='box',
                                index='index',
                                size=3,
                                height=600,
                                width=1150,
                                colormap='RdBu',
                                title={
                                    'text':
                                    "Netflix Stock Price (Scatterplot Matrix)",
                                    'x': 0.5,
                                    'xanchor': 'center'
                                }))

dp.Report(
    dp.Group(dp.Plot(fig0),
             dp.Plot(fig1),
             dp.Plot(fig2),
             dp.Plot(fig3),
             dp.Plot(fig4),
             dp.Plot(fig5),
             dp.Plot(fig6),
             dp.Plot(fig7),
             columns=2,
             rows=4), dp.Plot(fig8)).publish(name='stock_report', open=True)
Esempio n. 26
0
                         name='PDF',
                         line=dict(color='#ffc93c', width=2.5)),
              row=1,
              col=1)

fig.add_trace(go.Scatter(x=values,
                         y=cdfproba,
                         name='CDF',
                         line=dict(color='#ffc93c', width=2.5)),
              row=1,
              col=2)
fig.update_layout(plot_bgcolor='#0f4c75')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

report = dp.Report(dp.Plot(fig))  #Create a report
report.publish(name='my_plot', open=True,
               visibility='PUBLIC')  #Publish the report

# In[2]:

import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from scipy.stats import norm

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

values = np.linspace(-3, 3, num=101)
Esempio n. 27
0
# %% Doc setup
import datapane as dp
import sidetable as stb

from geo import m
from profiles import Arashiyama_df, Chayama_df, Demachiyanagi_df, Hanazono_df, Kokusaikaikan_df, izakaya_df, restaurants, station_groupby_df

# %% Report

rprt = dp.Report(
    dp.Text("""
# 京都, in Stations and Their Restaurants
### Exploration of restaurants in Kyoto and the stations they're closest to
Splitting up Kyoto restaurants by regions centered on train/subway stations, as well as checking out some interesting characteristics of the different restaurants near each station. As it turns out, there's a ton of izakaya in the city
---
"""), dp.Plot(m),
    dp.Text("""
## Dataset
The dataset used for the current project can be found [here](https://www.kaggle.com/koki25ando/tabelog-restaurant-review-dataset). Station data was pulled manually from wikipedia and google maps.
"""), dp.DataTable(restaurants),
    dp.Text("""
## Some Special Station Profiles
*Taking a look at some stations which have interesting or peculiar restaurant selection*
"""), dp.DataTable(station_groupby_df),
    dp.Text("""
### Arashiyama
#### Lots (relatively, at least) of tofu options, beware of expensive lunch prices
		"""), dp.DataTable(Arashiyama_df),
    dp.Text("""
### Demachiyanagi
#### A station whose food options are split between expensive kaiseki and cheap izakaya fare
Esempio n. 28
0
    # ax.set_color(random_color)
    return ax


stock_list = [
    "NTAR.CN", "IDK.CN", "ART.V", "PKK.CN", "APHA.TO", "CMC.CN", "AMPD.CN",
    "MTRX.V"
]

curr_date = datetime.today().strftime('%Y-%m-%d')
start_date = (datetime.today() - timedelta(days=59)).strftime('%Y-%m-%d')

df_assets = generate_up(stock_list, start_date, curr_date)

figure_list = [
    dp.Plot(intraday_plot(stock, start_date, curr_date))
    for stock in stock_list
]

publish_report = False
dp_token = os.getenv('DP_TOKEN')
if dp_token:
    # login
    try:
        publish_report = True
    except Exception as e:
        print(e)

# login
r = dp.Report(f'### Intraday Report for {curr_date}', dp.Table(df_assets),
              dp.Blocks(*figure_list, columns=2))
Esempio n. 29
0
     blocks=[
         dp.HTML(html),
         "The data has been compiled over 3 semesters, for the introductory computer science class CSE 1223.",
         dp.Group(dp.BigNumber(heading="Number of Students",
                               value=num_students),
                  dp.BigNumber(heading="Class Average",
                               value=str(class_average) + "%",
                               change="2%",
                               is_upward_change=True),
                  columns=2),
         dp.BigNumber(heading="Pass Rate", value=str(pass_rate) + "%"),
     ]),
 dp.Page(label='Deliverables',
         blocks=[
             f'### Labs',
             dp.Plot(lab_bar), f'### Homeworks',
             dp.Plot(hw_bar), f'### Projects',
             dp.Plot(project_bar)
         ]),
 dp.Page(label='Exam Grades',
         blocks=[f'### Kernel Density Plot ',
                 dp.Plot(exams_kdp)]),
 dp.Page(label='Final Grades',
         blocks=[
             f'### Comparing Final Grade Data ',
             dp.Plot(fg_hist),
             dp.Plot(letter_pie)
         ]),
 dp.Page(
     dp.Select(blocks=[
         dp.Plot(sex_bar, label='Sex-Bar Chart'),
Esempio n. 30
0
       blocks=["#### Vagas - Engenheiro de Dados", 
       dp.DataTable(df_ED[colunas], label = "Engenheiro de Dados")]
     )

pagina5 = dp.Page(
       title ="Engenheiro de Machine Learning",
       blocks=["#### Vagas - Engenheiro de Machine Learning", 
       dp.DataTable(df_EML[colunas], label = "Engenheiro de Machine Learning")]
     )

"""
r = dp.Report(
    dp.Page(title="Dashes",
            blocks=[
                "#### Heatmap de Vagas pelo Brasil",
                dp.Plot(mapa), "#### Total Vagas",
                dp.Plot(fig3), "#### Total Vagas por Estado",
                dp.Plot(fig1), "#### Total Vagas por Nível",
                dp.Plot(fig2)
            ]),
    dp.Page(title="Cientista de Dados",
            blocks=[
                "#### Vagas - Cientista de Dados",
                dp.DataTable(df_CD[colunas], label="Cientista de Dados")
            ]),
    dp.Page(title="Analista de Dados",
            blocks=[
                "#### Vagas Analista de Dados",
                dp.DataTable(df_AD[colunas], label="Analista de Dados")
            ]),
    dp.Page(title="Engenheiro de Dados",