def test_complex_df_report(): """Test our dataframe importing with types of DFs user's upload""" tz_df = convert_csv_pd(""" date,datetime,datetime_tz 2017-01-10,2017-01-21T23:10:24,2020-03-23T00:00:00.000Z 2017-01-11,2017-01-23T23:01:24,2020-04-23T00:00:00.000Z """) raw_data = { "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"], "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"], "age": [42, 52, 36, 24, 73], "preTestScore": [4, 24, 31, 2, 3], "postTestScore": [25, 94, 57, 62, 70], } index_df = pd.DataFrame(raw_data, columns=[ "first_name", "last_name", "age", "preTestScore", "postTestScore" ]) df_desc = index_df.describe() df_desc_2 = df_desc.reset_index() tz_t = dp.Table(tz_df) index_t = dp.Table(index_df) df_desc_t = dp.Table(df_desc) df_desc_2_t = dp.Table(df_desc_2) with deletable(dp.Report(tz_t, index_t, df_desc_t, df_desc_2_t)) as dp_report: dp_report.publish(name=gen_name())
def gen_report_with_files(datadir: Path, single_file: bool = False) -> dp.Report: # Asset tests lis = [1, 2, 3] df = gen_df(10000) md_block = dp.Markdown( text="# Test markdown block <hello/> \n Test **content**") list_asset = dp.File(data=lis, name="List Asset", is_json=True) img_asset = dp.File(file=datadir / "datapane-logo.png") plot_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x", y="y"), caption="Plot Asset") df_asset = dp.Table(df=df, caption="Test Dataframe Table") pivot_asset = dp.Table(df=df, caption="Test Dataframe PivotTable", can_pivot=True) if single_file: return dp.Report(dp.Blocks([md_block, plot_asset])) else: return dp.Report(list_asset, img_asset, df_asset, md_block, plot_asset, pivot_asset)
def test_report(tmp_path: Path): df = gen_df() name = gen_name() description = gen_description() source_url = "https://github.com/datapane/datapane" # create a basic report m = dp.Markdown("hello world!!") # Asset tests lis = [1, 2, 3] json_list: str = json.dumps(lis) plot = alt.Chart(df).mark_line().encode(x="x", y="y") # create the DP fn = tmp_path / "json_list.json" fn.write_text(data=json_list) file_asset = dp.File(file=fn) json_asset = dp.File(data=json_list, is_json=True) plot_asset = dp.Plot(data=plot) list_asset = dp.File(data=lis, is_json=True) df_asset = dp.Table(df=df, caption="Our Dataframe") dp_report = api.Report(m, file_asset, df_asset, json_asset, plot_asset, list_asset) dp_report.publish(name=name, description=description, source_url=source_url) with deletable(dp_report): # are the fields ok check_name(dp_report, name) assert dp_report.description == description assert dp_report.source_url == source_url assert len(dp_report._top_block.blocks[0].blocks) == 6
def test_report(tmp_path: Path): df = gen_df() name = gen_name() headline = gen_headline() # create a basic report m = dp.Markdown("hello world!!") # Asset tests lis = [1, 2, 3] json_list: str = json.dumps(lis) plot = alt.Chart(df).mark_line().encode(x="x", y="y") # create the DP fn = tmp_path / "json_list.json" fn.write_text(data=json_list) file_asset = dp.File(file=fn) json_asset = dp.File(data=json_list, is_json=True) plot_asset = dp.Plot(data=plot) list_asset = dp.File(data=lis, is_json=True) df_asset = dp.Table(df=df, caption="Our Dataframe") dp_report = api.Report(m, file_asset, df_asset, json_asset, plot_asset, list_asset) dp_report.publish(name=name, headline=headline) with deletable(dp_report): # are the fields ok assert dp_report.headline == headline assert len(dp_report.top_block.blocks) == 6
def LoadReport(title, X, Y, prediccion, anios): """ Función de utilidad para generar el reporte en Datapane. Crea una tabla que contiene los valores de las predicciones, así como las métricas con las que se evalúan los modelos. También contiene una gráfica de puntos con los valores de la predicción. Args: title (str): titulo del reporte de datapane. X (:obj: `numpy.array`): datos reales de prueba. Y (:obj: `numpy.array`): datos reales de predicción. prediccion (:obj: `numpy.array`): datos predichos. anios (int): número de años que se predijeron. Returns: (:obj: `datapane.Report`): reporte de datapane listo para publicar o guardar. """ anios_ = [1998 + i for i in range(len(X))] n1 = len(anios_) anios_ += [1997 + len(X) + i for i in range(len(Y) + 1)] n2 = len(anios_) - n1 anios_ += [1997 + len(X) + i for i in range(len(Y) + 1)] n3 = len(anios_) - n2 - n1 series = ['Datos de entrenamiento'] * n1 series += ['Datos de prueba'] * n2 series += ['Datos predichos'] * n3 alumnos = list(X) alumnos += (list(X) + list(Y))[-(anios + 1):] alumnos += (list(X) + list(prediccion))[-(anios + 1):] df = pd.DataFrame(data={ 'Serie': series, 'Año': anios_, 'Alumnos': alumnos }) columns = ['Año %d' % (i) for i in range(1, anios + 1)] columns += ['MAPE', 'MAE', 'RMSE'] metricas = np.zeros(3) # MAPE metricas[0] = np.abs((prediccion - Y) / Y).mean() # MAE metricas[1] = np.abs(prediccion - Y).mean() # RMSE metricas[2] = np.sqrt(np.square(prediccion - Y).mean()) metricas = pd.DataFrame(np.array([list(prediccion) + list(metricas)]), columns=columns) # Crear grafica chart = alt.Chart(df).mark_line().encode( x='Año', y='Alumnos', color='Serie').mark_line( point=True).interactive().properties(title=title) # Crear reporte reporte = dp.Report(dp.Table(metricas), dp.Plot(chart)) return reporte
def gen_report_complex_with_files(datadir: Path, single_file: bool = False, local_report: bool = False) -> dp.Report: # Asset tests lis = [1, 2, 3] small_df = gen_df() big_df = gen_df(10000) # text # md_block html_block = dp.HTML(html="<h1>Hello World</h1>") html_block_1 = dp.HTML(html=h2("Hello World")) code_block = dp.Code(code="print('hello')", language="python") formula_block = dp.Formula(formula=r"\frac{1}{\sqrt{x^2 + 1}}") big_number = dp.BigNumber(heading="Tests written", value=1234) big_number_1 = dp.BigNumber(heading="Real Tests written :)", value=11, change=2, is_upward_change=True) embed_block = dp.Embed(url="https://www.youtube.com/watch?v=JDe14ulcfLA") # assets plot_asset = dp.Plot(data=gen_plot(), caption="Plot Asset") list_asset = dp.File(data=lis, filename="List Asset", is_json=True) img_asset = dp.File(file=datadir / "datapane-logo.png") # tables table_asset = dp.Table(data=small_df, caption="Test Basic Table") # local reports don't support DataTable dt_asset = table_asset if local_report else dp.DataTable( df=big_df, caption="Test DataTable") if single_file: return dp.Report(dp.Group(blocks=[md_block, dt_asset])) else: return dp.Report( dp.Page( dp.Select(md_block, html_block, html_block_1, code_block, formula_block, embed_block, type=dp.SelectType.TABS), dp.Group(big_number, big_number_1, columns=2), ), dp.Page( plot_asset, list_asset, img_asset, table_asset, dt_asset, ), )
def discrim_html_output_datapane(self, ProcDiscrim, fileName): """Création d'un reporting en format HTML pour la méthode PROC DISCRIM grâce à la librairie datapane. Paramètres ---------- ProcDiscrim : objet LinearDiscriminantAnalysis objet suite à appel de la fonction fit() de la classe LinearDiscriminantAnalysis fileName : string nom du fichier de sortie (avec ou sans .html) """ if fileName[-5:] != ".html": fileName += ".html" ProcDiscrim._stats_dataset() ProcDiscrim._stats_classes() ProcDiscrim._stats_pooled_cov_matrix() ProcDiscrim._stats_wilks() report = dp.Report( dp.Text("# Linear Discriminant Analysis"), dp.Text("## General information about the data"), dp.Table(ProcDiscrim.infoDataset), dp.Table(ProcDiscrim.infoClasses), dp.Text("## Informations on the covariance matrix"), dp.Table(ProcDiscrim.W), dp.Table(ProcDiscrim.infoCovMatrix), dp.Text("## Function of lda and its' intercept " "and coefficients"), dp.Table(ProcDiscrim.infoFuncClassement), dp.Text("## Statistics. Wilks' Lambda"), dp.Table(ProcDiscrim.infoWilksStats)) report.save(path=fileName)
def gen_report_with_files(datadir: Path, single_file: bool = False) -> dp.Report: # Asset tests lis = [1, 2, 3] small_df = gen_df() big_df = gen_df(10000) # text md_block = dp.Markdown( text="# Test markdown block </hello> \n Test **content**") html_block = dp.HTML(html="Hello World</hello>") big_number = dp.BigNumber(heading="Tests written", value=1234) big_number_1 = dp.BigNumber(heading="Real Tests written :)", value=11, change=2, is_upward_change=True) # assets plot_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x", y="y"), caption="Plot Asset") list_asset = dp.File(data=lis, name="List Asset", is_json=True) img_asset = dp.File(file=datadir / "datapane-logo.png") # tables table_asset = dp.Table(data=small_df, caption="Test Basic Table") dt_asset = dp.DataTable(df=big_df, caption="Test DataTable") dt_pivot_asset = dp.DataTable(df=big_df, caption="Test DataTable with Pivot", can_pivot=True) if single_file: return dp.Report(dp.Blocks(blocks=[md_block, plot_asset])) else: return dp.Report( md_block, html_block, big_number, big_number_1, plot_asset, list_asset, img_asset, table_asset, dt_asset, dt_pivot_asset, )
"""{{ name }} script""" import pandas as pd import datapane as dp # TODO - enter your code here... df = pd.DataFrame.from_dict({"x": [4, 3, 2, 1], "y": [10.5, 20.5, 30.5, 40.5]}) # Create your datapane report components report = dp.Report( dp.Markdown(f"#### **Sample** Markdown block"), dp.Table(df) ) report.publish(name="my_report")
import pandas as pd import datapane as dp # basic report creation, with params df = pd.DataFrame.from_dict({"x": [4, 3, 2, 1], "y": [10.5, 20.5, 30.5, 40.5]}) blocks = [dp.Markdown(f"Dummy Markdown block - {dp.Params['p1']}"), dp.Table(df)] # test running as main or by datapane runner if dp.on_datapane: print("on datapane") if __name__ == "__datapane__": # same as dp.by_datapane print("by datapane") report = dp.Report(blocks=blocks) report.publish(name="dp_report", headline="My Report")
"""{{ name }} script""" import pandas as pd import datapane as dp # TODO - enter your code here... df = pd.DataFrame.from_dict({"x": [4, 3, 2, 1], "y": [10.5, 20.5, 30.5, 40.5]}) # Create your datapane report components report = dp.Report(dp.Markdown(f"#### **Sample** Markdown block"), dp.Table(df)) report.publish(name="my_report")
) # In-line JSON list_asset = dp.File(data=lis, is_json=True) # Downloadable file file_asset = dp.File(data=lis) # In-line image img_asset = dp.File(file=Path("./datapane-logo.png")) # Vega vega_asset = dp.Plot(data=gen_plot()) # Table df_table_asset = dp.Table(gen_df()) df_datatable_asset = dp.DataTable(gen_df(10000)) # Matplotlib np.random.seed(19680801) xdata = np.random.random([2, 10]) xdata1 = xdata[0, :] xdata2 = xdata[1, :] xdata1.sort() xdata2.sort() ydata1 = xdata1**2 ydata2 = 1 - xdata2**3 mpl_fig = plt.figure(figsize=(15, 15)) ax = mpl_fig.add_subplot(1, 1, 1) ax.plot(xdata1, ydata1, color='tab:blue') ax.plot(xdata2, ydata2, color='tab:orange')
"""{{ name }} report""" import altair as alt import pandas as pd import datapane as dp # get the data dataset = pd.read_csv( "https://covid.ourworldindata.org/data/owid-covid-data.csv") df = dataset.groupby( ["continent", "date"])["new_cases_smoothed_per_million"].mean().reset_index() # build an altair plot plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode( x='date:T', y=alt.Y('new_cases_smoothed_per_million:Q', stack=None), color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')), tooltip='continent:N').interactive().properties(width='container') # embed data and plot into a Datapane report and publish report = dp.Report("## Covid data per continent", dp.Plot(plot), dp.Table(df)) report.publish( name="Covid Demo {{ name }}", description= "Plot of Covid infections per continent, using data from ourworldindata", open=True, )
import pandas as pd import altair as alt import datapane as dp dataset = pd.read_csv( 'https://covid.ourworldindata.org/data/owid-covid-data.csv') df = dataset.groupby( ['continent', 'date'])['new_cases_smoothed_per_million'].mean().reset_index() plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode( x='date:T', y=alt.Y('new_cases_smoothed_per_million:Q', stack=None), color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')), tooltip='continent:N').interactive().properties(width='container') dp.Report(dp.Plot(plot), dp.Table(df)).publish(name='covid_report', open=True)
+ [に、へ、で](#-----) * [Common Heuristics](#common-heuristics-4) * [Insights](#insights-4) + [ね、よ、わ、さ、な、ん](#-----------) * [Common Heuristics](#common-heuristics-5) * [Insights](#insights-5) """), dp.Markdown(""" ## Dataset The corpora used for the current project can be found [here](https://www.kaggle.com/bryanpark/japanese-single-speaker-speech-dataset), [here](https://www.kaggle.com/alvations/tatoeba), and [here](https://www.kaggle.com/nltkdata/knb-corpus). They've been processed via the [Ginza](https://github.com/megagonlabs/ginza) library, which is based on [SudachiPy](https://github.com/WorksApplications/SudachiPy) and [spaCy](https://spacy.io/). These corpora represent a mix of transcribed speech, translated example sentences, and blog articles. ### Full NLP for All Particles >*Linguistic attributes for all tokens tagged as any sort of particle* """), dp.Table(particle_df), dp.Markdown(""" ### Relative Frequency for All Particles >*Frequency table for all particles, including counts, percentages, and cumulative statistics* """), dp.Table(particle_stb), dp.Markdown(""" --- ## Particle Comparisons >*Here, we can look at the contexts in which different particles appear most frequently, and see how these contexts compare to conventional wisdom/rules about how the particles are used* ### は & が #### Common Heuristics + One common way to differentiate は and が is that は marks the *topic* of a sentence, where が marks the grammatical *subject* in a sentence. + In this sense, は can lend more emphasis to the subject it marks as compared to が + が tends to be used more in noun and subordinate clauses, and if は is used in these contexts, it tends to be for emphasis
rows += [{ 'ticker': ETHC_ticker, 'currency': currency.symbol, 'nav': nav, 'nav_per_share': nav_per_share, 'share_price': price, 'shares_outstanding': shares_outstanding, 'premium': (price / nav_per_share - 1) * 100 }] df = pd.DataFrame(rows) r = dp.Report( f'# Ether Capital Corp. NAV', dp.Text(f'Updated {now}'), f'### Holdings', dp.Table(pd.DataFrame(current_holdings).iloc[1:]), f'### Share Price Premium', dp.Table(df), f'The maximum discount is {-df["premium"].min():.1f} %' \ if df["premium"].mean() < 0 else f'The maximum premium is {df["premium"].max():+.1f} %' ) r.save( path='report.html', name=f'Ether Capital Corp. NAV', open=False )
'https://covid.ourworldindata.org/data/vaccinations/vaccinations-by-manufacturer.csv', parse_dates=['date']) df = df.groupby(['vaccine', 'date'])['total_vaccinations'].sum().tail(1000).reset_index() # plot vaccinations over time using Altair plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode( x='date:T', y=alt.Y('total_vaccinations:Q'), color=alt.Color('vaccine:N', scale=alt.Scale(scheme='set1')), tooltip='vaccine:N').interactive().properties(width='container') # tablulate total vaccinations by manufacturer total_df = df[df["date"] == df["date"].max()].sort_values( "total_vaccinations", ascending=False).reset_index(drop=True) total_styled = total_df.style.bar(subset=["total_vaccinations"], color='#5fba7d', vmax=total_df["total_vaccinations"].sum()) # embed into a Datapane Report report = dp.Report( "## Vaccination Report", dp.Plot(plot, caption="Vaccinations by manufacturer over time"), dp.Table(total_styled, caption="Current vaccination totals by manufacturer"), dp.Table(df, caption="Initial Dataset")) report.upload( name='Covid Vaccinations Demo {{ name }}', description="Covid Vaccinations report, using data from ourworldindata", open=True)
"NTAR.CN", "IDK.CN", "ART.V", "PKK.CN", "APHA.TO", "CMC.CN", "AMPD.CN", "MTRX.V" ] curr_date = datetime.today().strftime('%Y-%m-%d') start_date = (datetime.today() - timedelta(days=59)).strftime('%Y-%m-%d') df_assets = generate_up(stock_list, start_date, curr_date) figure_list = [ dp.Plot(intraday_plot(stock, start_date, curr_date)) for stock in stock_list ] publish_report = False dp_token = os.getenv('DP_TOKEN') if dp_token: # login try: publish_report = True except Exception as e: print(e) # login r = dp.Report(f'### Intraday Report for {curr_date}', dp.Table(df_assets), dp.Blocks(*figure_list, columns=2)) r.save(path='index.html', open=True) if publish_report == True: r.publish(name='Daily Report', open=False, tweet=False)
md_block = dp.Markdown(text="# Test markdown block \n Test **content**") # In-line JSON list_asset = dp.File(data=lis, is_json=True) # Downloadable file file_asset = dp.File(data=lis) # In-line image img_asset = dp.File(file=Path("./datapane-logo.png")) # Vega vega_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x", y="y")) # Table df_asset = dp.Table(df, can_pivot=False) # Pivot table pv_asset = dp.Table(gen_df(10), can_pivot=True) # Matplotlib np.random.seed(19680801) xdata = np.random.random([2, 10]) xdata1 = xdata[0, :] xdata2 = xdata[1, :] xdata1.sort() xdata2.sort() ydata1 = xdata1**2 ydata2 = 1 - xdata2**3 mpl_fig = plt.figure(figsize=(15, 15)) ax = mpl_fig.add_subplot(1, 1, 1)
''' DATAPANE INTERACTIVE REPORT pip install datapane # https://datapane.com/reports/?name=&owned_by_me=on&order= # Remember to login # https://towardsdatascience.com/introduction-to-datapane-a-python-library-to-build-interactive-reports-4593fd3cb9c8 ''' import pandas as pd import altair as alt import datapane as dp df = pd.read_csv( 'https://query1.finance.yahoo.com/v7/finance/download/GOOG?period2=1585222905&interval=1mo&events=history' ) chart = alt.Chart(df).encode(x='Date:T', y='Open').mark_line().interactive() # Once you have the df and the chart, simply use r = dp.Report( dp.Markdown('My simple report'), #add description to the report dp.Table(df), #create a table dp.Plot(chart) #create a chart ) # Publish your report. Make sure to have visibility='PUBLIC' if you want to share your report r.publish(name='stock_report', visibility='PUBLIC')
pip install datapane # In[2]: import pandas as pd import altair as alt import datapane as dp # In[3]: df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/GOOG?period2=1585222905&interval=1mo&events=history') chart = alt.Chart(df).encode( x='Date:T', y='Open' ).mark_line().interactive() r = dp.Report(dp.Table(df), dp.Plot(chart)) r.save(path='report.html', open=True) # In[ ]:
"Media - Diversified", "Industrial Distribution", "Agriculture", "Beverages - Non-Alcoholic", "Medical Devices & Instruments", "Diversified Financial Services", "Furnishings, Fixtures & Appliances", "Steel", "Packaging & Containers", "Semiconductors", "Waste Management", "Healthcare Plans", ] def industry_to_md(industry_list): return '\n'.join([f'* {i}' for i in industry_list]) pattern = '(?i)Waste Management|(?i)Interactive Media|(?i)Telecommunication Services|(?i)Software|(?i)Hardware' desired_df = stock_df[stock_df['industry'].str.contains(pattern, na=False)] industry_list = industry_to_md(industries) curr_date = datetime.today().strftime('%Y-%m-%d') r = dp.Report(f'### Desired Stocks for {curr_date}', dp.Table(desired_df), f'### Industry List', f'{industry_list}') r.save(path='industry_index.html') r.publish(name='Desired Tickers', open=False, tweet=False)