Python pivot_ui Exemples, pivottablejs.pivot_ui Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : TrialStatistics.py Projet : elhamod/HGNN

 def showStatistics(self, aggregated=True, saveHTML=False):
     df = self.df.copy()
     if aggregated:
         if self.agg_df.empty:
             self.aggregateTrials()
         df = self.agg_df.copy()
         
     name = "aggregated statistics" if aggregated else "raw statistics"
     name_html = name+'.html'
     display(HTML(df.to_html()))
     if saveHTML:
         pivot_ui(df,outfile_path=os.path.join(self.experiment_name, name_html))

Exemple #2

0

Afficher le fichier

Fichier : TrialStatistics.py Projet : elhamod/myhelpers

    def showStatistics(self, aggregated=True, saveHTML=False):
        df = self.df.copy()
        if aggregated:
            if self.agg_df.empty:
                self.aggregateTrials()
            df = self.agg_df.copy()
            
        name = "aggregated statistics" if aggregated else "raw statistics"
        name_html = name+'.html'
#         df.columns = [' '.join(col).strip() for col in df.columns.values] # work around:https://github.com/quantopian/qgrid/issues/18#issuecomment-149321165
#         return qgrid.show_grid(df, show_toolbar=True)
        display(HTML(df.to_html()))
        if saveHTML:
            pivot_ui(df,outfile_path=os.path.join(self.experiment_name, name_html))

Exemple #3

0

Afficher le fichier

    def showDF(self, df, how='print'):
        if how == 'print':
            #pretty print string
            print(df.to_string())
        elif how == 'pivot':
            #render pivot table in chrome
            pivot_ui(df, outfile_path='x.html')
            webbrowser.open(SPATH + 'x.html', new=2)
        elif how == 'tabulate':
            #print string with borders
            print(tabulate(df, headers='keys', tablefmt='psql'))
        else:
            print('invalid "how" in showDF()')

        print(df.shape)

Exemple #4

0

Afficher le fichier

def _pivot_ui(df, totals=True, row_totals=True, **options):
    """ Interactive pivot table for data analysis.
    # Example options:
    rows=['x', 'y'),
    cols=['z, 'v'),
    vals=['percentile/99th',],
    aggregatorName='First',
    rendererName='Heatmap'
    """
    try:
        from pivottablejs import pivot_ui
    except ImportError:
        log.warning("Error: cannot import pivottablejs, Pivottable will not be generated'!")
        return
    iframe = pivot_ui(df, **options)
    if not totals:
        with open(iframe.src) as f:
            replaced_html = f.read().replace(
                '</style>', '.pvtTotal, .pvtTotalLabel, .pvtGrandTotal {display: none}</style>')
        with open(iframe.src, "w") as f:
            f.write(replaced_html)
    if not row_totals:
        with open(iframe.src) as f:
            replaced_html = f.read().replace(
                '</style>', '.rowTotal, .pvtRowTotalLabel, .pvtGrandTotal {display: none}</style>')
        with open(iframe.src, "w") as f:
            f.write(replaced_html)
    return iframe

Exemple #5

0

Afficher le fichier

def pivot_ui(df, **kwargs):
    import pivottablejs

    class _DataFrame(pd.DataFrame):
        def to_csv(self, **kwargs):
            return super().to_csv(**kwargs).replace("\r\n", "\n")

    return pivottablejs.pivot_ui(_DataFrame(df), **kwargs)

Exemple #6

0

Afficher le fichier

Fichier : swan.py Projet : iwankgb/swan

 def pivot_ui(self):
     """ Interactive pivot table for data analysis. """
     try:
         from pivottablejs import pivot_ui
     except ImportError:
         print(
             "Error: cannot import pivottablejs, please install 'pip install pivottablejs'!"
         )
         return
     return pivot_ui(self.df)

Exemple #7

0

Afficher le fichier

def custom_filter_plots(input_csv_file, output_directory, custom_pivot_table):
    """

    Here, user can edit/customize functions in order to analyze results.
    If interactive pivot table is true, an interactive (drag) panel containing
    the pivot table will appear inside a browser (Integrated with Jupyter).


    Inputs:
           Parse file containing all results from simulation campaign.
    Outputs:
           Custom filtered/sorted data files
           Customized plots (custom or using interactive pivot tables)


    """

    new_folder(output_directory)
    df = pd.read_csv(input_csv_file)

    if custom_pivot_table:
        pj.pivot_ui(df)
        cmd = 'firefox pivottablejs.html'
        os.system(cmd)
    else:
        # Rename columns for axis plotting # TO DO
        df = df.rename(columns={evaluate: evaluate})

        # cambio filas
        #df = df.rename(columns={'accidentDuration': 'Beacon_interval(s)', 'beaconInterval': 'Accident_duration'})

        # Examples of custom analyze functions
        #node_speed(df, output_directory)
        #packet_losses_new(df, output_directory)
        rx_time_distribution(df, output_directory)
        distance(df, output_directory)
        #node_counter(df, output_directory)
        # Print outputs
        print('\nFiles generated: ')
        [
            print(' {}) {}'.format(i, file))
            for i, file in enumerate(os.listdir(output_directory))
        ]

Exemple #8

0

Afficher le fichier

def display_dataframe_with_pivotablejs(obj: Any):
    """
    Function to convert a variable to a pivotable js
        :param obj: table object to display,
    """
    tf = tempfile.NamedTemporaryFile(prefix="sho_",
                                     suffix=".html",
                                     delete=False)
    file_path = tf.name
    cols = list(obj.columns.values)
    print(f"File Name : {tf.name}")
    pivot_ui(obj, outfile_path=file_path, rows=cols)
    try:
        browser = webbrowser.get()
        browser.open('file://' + os.path.realpath(file_path))
    except Exception as e:
        logger.info(f"Couldn't find chrome !")
        webbrowser.open('file://' + os.path.realpath(file_path))
    time.sleep(5)
    logger.info(f"File Name : {tf.name}")

Exemple #9

0

Afficher le fichier

def df_meta_ui(df):
    box = ipyw.HBox()
    out_left = ipyw.Output()
    out_right = ipyw.Output(layout=ipyw.Layout(width="100%"))
    desc_df = df.describe()
    box.children = [out_left, out_right]
    with out_left:
        df.info()
        display(desc_df)
        display(df.head())
    with out_right:
        display(pivot_ui(df))
    return box

Exemple #10

0

Afficher le fichier

def _pivot_ui(df, totals=True, **options):
    """ Interactive pivot table for data analysis. """
    try:
        from pivottablejs import pivot_ui
    except ImportError:
        print("Error: cannot import pivottablejs, please install 'pip install pivottablejs'!")
        return
    iframe = pivot_ui(df, **options)
    if not totals:
        with open(iframe.src) as f:
            replacedHtml = f.read().replace(
                '</style>',
                '.pvtTotal, .pvtTotalLabel, .pvtGrandTotal {display: none}</style>'
            )
        with open(iframe.src, "w") as f:
            f.write(replacedHtml)
    return iframe

Exemple #11

0

Afficher le fichier

# in-Notebook GUI for pivoting the data (good for exploring categorical data)
from pivottablejs import pivot_ui

pivot_ui(data)

Exemple #12

0

Afficher le fichier

Fichier : csv2pivot_html.py Projet : riron1206/data_analysis

列数行数大きすぎると重くて使えないが。。。（300列あるとなんもでなくなる）

Usage:
    $ conda activate tfgpu
    $ python csv2pivot_html.py -o ./ -i ./train.csv
"""
import os
import pathlib
import argparse
import pandas as pd
from pivottablejs import pivot_ui
from IPython.display import HTML

if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument(
        "-o", "--output_dir", type=str, default="./", help="output dir path.",
    )
    ap.add_argument(
        "-i", "--input_csv", type=str, help="input csv path.",
    )
    args = vars(ap.parse_args())

    os.makedirs(args["output_dir"], exist_ok=True)
    df = pd.read_csv(args["input_csv"])
    outfile_path = f'{args["output_dir"]}/{pathlib.Path(args["input_csv"]).stem}.html'

    # pivot html出力
    pivot_ui(df, outfile_path=outfile_path)
    HTML(outfile_path)

Exemple #13

0

Afficher le fichier

Fichier : EDA6.py Projet : niangu/Python_machine_learning

    ts[variable] = ts[variable].notnull() * 1
    map = folium.Map(location=[48, -102], zoom_start=2)
    map.choropleth(geo_data=geo,
                   data=ts,
                   columns=['country', variable],
                   key_on='feature.properties.name',
                   reset=True,
                   fill_color='GnBu',
                   fill_opacity=1,
                   line_opacity=0.2,
                   legend_name=legend_name if legend_name else variable)
    return map


plot_null_map(data, '2013-2017', 'number_undernourished',
              'Number undernourished is missing')

#Over time
fig, ax = plt.subplots(figsize=(16, 16))
sns.heatmap(data.groupby(['time_period',
                          'variable']).value.count().unstack().T,
            ax=ax)
plt.xticks(rotation=45)
plt.xlabel('Time period')
plt.ylabel('Variable')
plt.title('Number of countries with data reported for each variable over time')
plt.show()

pivottablejs.pivot_ui(time_slice(data, '2013-2017'), )
pandas_profiling.ProfileReport(time_slice(data, '2013-2017'))

Exemple #14

0

Afficher le fichier

Fichier : 01-Tips-and-tricks.py Projet : jbwhit/svds-jupyter


# In[5]:

df = pd.read_csv("../data/mps.csv")


# In[6]:

df.head()


# In[7]:

from pivottablejs import pivot_ui
pivot_ui(df)
# Province, Party, Average, Age, Heatmap


# # Keyboard shortcuts

# In[8]:

# in select mode, shift j/k (to select multiple cells at once)
# split cell with ctrl shift -


# In[9]:

first = 1

Exemple #15

0

Afficher le fichier

        financials[j]['Current Ratio'] = Ratios[j + p]['currentRatio']
        financials[j]['Cash Conversion Cycle'] = Ratios[
            j + p]['cashConversionCycle']

        # Price Ratios
        financials[j]['Mkt Cap'] = key_Metrics[j + p]['marketCap'] / millions
        financials[j]['PE'] = Ratios[j + p]['priceEarningsRatio']
        financials[j]['PS'] = Ratios[j + p]['priceToSalesRatio']
        financials[j]['PB'] = Ratios[j + p]['priceToBookRatio']
        financials[j]['Price To FCF'] = Ratios[j +
                                               p]['priceToFreeCashFlowsRatio']
        financials[j]['PEG'] = Ratios[j + p]['priceEarningsToGrowthRatio']
        financials[j]['Revenue per Share'] = key_Metrics[j +
                                                         p]['revenuePerShare']
        financials[j]['EPS'] = IS[j + p]['eps']

    # Transform the dictionary into a Pandas
    fundamentals_single = pd.DataFrame.from_dict(financials, orient='index')

    # Add a new column that indicates the stock
    stock_identity = [company[i]] * len(fundamentals_single.index)
    fundamentals_single.insert(0, "Stock", stock_identity, True)

    # Concatenate the 2 dataframes together
    fundamentals_total = pd.concat([fundamentals_total, fundamentals_single])

# Export to Excel or directly to pivot table
pivot_ui(fundamentals_total)
with pd.ExcelWriter('fundamentals.xlsx', mode='a') as writer:
    fundamentals_total.to_excel(writer, sheet_name='consolidated_quarter')

Exemple #16

0

Afficher le fichier

c=3

# %%
b=5

# %%
e=8

# %%
df

# %%
from pivottablejs import pivot_ui



# %%

pivot_ui(df,outfile_path=’pivottablejs.html’)
HTML(‘pivottablejs.html’)

# %%
import qgrid

# %%
qgrid.show_grid(df.)

# %%

# %%

Exemple #17

0

Afficher le fichier

Fichier : socioLinguisticApp.py Projet : CoEDLSydneySpeaks/SummerScholar2017

def view_pivotTable(plot_data: plot_data):
    data = globals()[plot_data]
    display(pivot_ui(data))

Exemple #18

0

Afficher le fichier

    for i in json_device_list['devices']:
        device_dict['sysName'] = i['sysName']
        device_dict['ip'] = i['ip']
        device_dict['sysDescr'] = i['sysDescr']
        device_dict['os'] = i['os']
        device_dict['version'] = i['version']
        device_dict['location'] = i['location']
        device_dict['hardware'] = i['hardware']
        device_dict['hostname'] = i['hostname']
        device_dict['features'] = i['features']
        device_dict['serial'] = i['serial']
        device_dict['nms'] = "https://" + NMS_DEVICE_URL + str(i['device_id'])
        device_dict['rancid'] = "http://" + RANCID + i['hostname']
        devices_list.append(dict(device_dict))

with open(CSV_FILE, 'w') as csvfile:
    writer = csv.DictWriter(csvfile,delimiter=',',fieldnames=devices_list[0].keys())
    writer.writeheader()
    writer.writerows(devices_list)

df = pandas.read_csv(CSV_FILE)
pivot_ui(df,outfile_path=HTML_FILE,rows=['sysName','hostname','ip','hardware','os','version','features','serial','location','nms','rancid'])
with open(HTML_FILE) as myfile:
        txt = myfile.read()
            soup = bs4.BeautifulSoup(txt, features="html.parser")

            soup.head.append("Last Edit: " + DATE.strftime("%Y-%m-%d %H:%M"))
            with open(HTML_FILE, "w") as outfile:
                    outfile.write(str(soup))

Exemple #19

0

Afficher le fichier

def olap_configuration():

    global liste
    global i
    global label
    global text
    #récupérer les dimensions choisies
    my_dimension_temps = choose_dimension_temps.get()
    my_dimension_node = choose_dimension_node.get()
    my_dimension_spatiale = choose_dimension_spatiale.get()
    # Récupérer la liste des mesures dans une chaine de caractères sous la forme "mesure1,mesure2..."
    my_measure = str(measures_listbox.get(0))
    for item in range(1, len(measures_listbox.curselection())):
        my_measure = str(measures_listbox.get(item)) + "," + my_measure

    #Créer le dataframe utilisé dans la table pivot
    df = joined_data(my_dimension_temps, my_measure)

    booleen = True
    # choisir les champs à garder du dataframe df en distinguant les cas selon les dimensions choisies par l'utilisateur (il se peut
    # que l'utilisateur ne choisisse pas des dimensions)
    if (my_dimension_spatiale):
        if (my_dimension_node):
            if (my_dimension_temps):
                final_data = df[flat(my_measure.split(','), [
                    my_dimension_temps, my_dimension_node,
                    my_dimension_spatiale
                ])]

            else:
                final_data = df[[
                    my_dimension_node, my_dimension_spatiale,
                    my_measure.split(',')
                ]]
        elif (my_dimension_temps):
            final_data = df[[
                my_dimension_temps, my_dimension_spatiale,
                my_measure.split(',')
            ]]
        else:
            final_data = df[[my_dimension_spatiale, my_measure.split(',')]]
    else:
        if (my_dimension_node):
            if (my_dimension_temps):
                final_data = df[[
                    my_dimension_temps, my_dimension_node,
                    my_measure.split(',')
                ]]
            else:
                final_data = df[[my_dimension_node, my_measure.split(',')]]
        elif (my_dimension_temps):
            final_data = df[[my_dimension_temps, my_measure.split(',')]]

        #si l'utilisateur n'a choisi aucune dimension
        else:

            booleen = False

    # générer l'interface de la table pivot si l'utilisateur a choisi au moins une dimension
    if (booleen):
        pivot_ui(final_data,
                 rows=[
                     my_dimension_temps, my_dimension_node,
                     my_dimension_spatiale
                 ],
                 exclusions={final_data.columns[0]: ["null"]},
                 outfile_path='cube.html')
        HTML('cube.html')
        # Ouvrir l'interface dans le navigateur
        webbrowser.open("cube.html")
    # si l'utilisateur n'a choisi aucune dimension, un message d'erreur s'affiche
    else:
        error = Label(button_frame,
                      text="Il faut choisir des dimensions",
                      font=("Courrier", 14),
                      bg='#4065A4',
                      fg='red')
        error.pack()
    # réinitialiser la liste des saisies dans le filtre géospatial (liste des points + affichage des points saisis dans la variable text)
    liste = []
    i = 0
    text = ""
    label.config(text=text)

Exemple #20

0

Afficher le fichier

import pandas as pd

df = pd.read_csv("./tmp/tmp_all_data.csv").astype({"connection_hrs": float})
df.head()
print(df)

from pivottablejs import pivot_ui

pivot_ui(df, outfile_path='./output/reports/!ipivottablejs.html')

Exemple #21

0

Afficher le fichier

Fichier : Corona analysis Scandinavia.ipynb.py Projet : stelaninja/corona_stats_scandinavia

bb = signature.get_bbox_patch()
bb.set_boxstyle("ext", pad=0.6, width=ax.get_xlim()[1])

middle = (ax.get_xlim()[0] + ax.get_xlim()[1]) / 2
ax.text(x = middle, y = ax.get_ylim()[0]-offset,
                    s = "DEATHS PER DAY", fontsize = 14, color = "#f0f0f0",
                    ha="center", backgroundcolor = "steelblue")
ax.text(x = ax.get_xlim()[1], y = ax.get_ylim()[0]-offset,
                    s = str2, fontsize = 14, color = "#f0f0f0",
                    ha="right", backgroundcolor = "steelblue")

# Save as jpg and show plot
fname = str(datetime.date.today()) + "_sweden_death_compare" + ".png"
plt.savefig(fname, format="png", dpi="figure", bbox_inches="tight", facecolor=fig.get_facecolor())
sns.despine()
plt.show()


import pandas_profiling
from pivottablejs import pivot_ui
from pydqc import distribution_compare_pretty


pandas_profiling.ProfileReport(scb_deaths).to_file("report.html")


pivot_ui(scb_deaths)

Exemple #22

0

Afficher le fichier

Fichier : TOU_Analysis(SQL).py Projet : EverybodylovesRaymondk/CodeStudies


# In[12]:


X=list(dict.fromkeys(TOU['DAY']))
Y=list(dict.fromkeys(TOU['MONTH']))


# In[13]:


f = open("TOU_Analysis_Output.TXT","w+")
f.write('TOU Analysis Results: ({}/{} - {}/{})'.format(X[0],Y[0],X[-1],Y[-1]))
f.write('\n'*2)
for i in X:
    f.write('Day '+str(i)+'\n')
    f.write('1)TOU consumption' + ' = ' + str(round(sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME !='Active power (P) Import Total (kW)-kWh') ]['QTY']),2))+' kWh' + '\n')
    f.write('2)Total consumption' + ' = '+ str(round(sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME =='Active power (P) Import Total (kW)-kWh') ]['QTY']),2))+ ' kWh'+'\n')
    f.write('3)Difference = ' + str(round((sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME =='Active power (P) Import Total (kW)-kWh') ]['QTY']))-(sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME !='Active power (P) Import Total (kW)-kWh') ]['QTY'])),2))+' kWh' + '\n')
    f.write('\n')
    #print(str(i)+' Difference = ' + str(round((sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME =='Active power (P) Import Total (kW)-kWh') ]['QTY']))-(sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME !='Active power (P) Import Total (kW)-kWh') ]['QTY'])),2))+' kWh' + '\n')
f.close()


# In[14]:


pivot_ui(TOU,outfile_path='TOU_Analysis.html',width=0,height=0)

Exemple #23

0

Afficher le fichier

Fichier : pivot.py Projet : christophe-williams/rogue-shippers

def load_data(file):
	df = joblib.load(file)
	return pivot_ui(df)

Exemple #24

0

Afficher le fichier

Fichier : pivottable.py Projet : JonLoss55812/QuickDataSetReport

import pandas as pd
from pivottablejs import pivot_ui

df = pd.read_csv("data/placholder.csv")
pivot_ui(df)

Exemple #25

0

Afficher le fichier

    selectGraph = data.query('(YEAR == @selectYr) & (MONTH == @selectMth)')
    selectGraph = selectGraph.drop([
        'Unnamed: 0', 'WEEKNUMBER', 'CREATED_AT', 'LEVEL', 'AVER HOURLY SALARY'
    ],
                                   axis=1)
    selectGraph

# Appendices: pivot table and forecasting data
st.subheader('Appendices')

# Hide/Show pivot table for raw data
st.sidebar.subheader("Appendices")
if st.sidebar.checkbox("Show pivot table for the raw data", True):
    st.markdown("#### Pivot table for the raw data")
    dataPv = data.drop(['AVER HOURLY SALARY'], axis=1)
    pvTable = pivot_ui(dataPv)
    with open(pvTable.src) as t:
        components.html(t.read(), height=400, scrolling=True)

# Below is the machine learning time series forecasting for time enteries
dataPrd = data
dataPrd = pd.pivot_table(dataPrd,
                         index=['CREATED_AT'],
                         values='HOUR',
                         aggfunc='sum',
                         margins=True)
dataPrd = dataPrd[:-1]
dataPrd = pd.DataFrame(dataPrd.to_records())

dates = list(dataPrd['CREATED_AT'])
dates = list(pd.to_datetime(dates))

Exemple #26

0

Afficher le fichier

Fichier : main.py Projet : cazhu/Credit-Risk-Modeling-Using-Xgboost

    'emp_length',
    'fico_range_low(log)',
    'term',
    'int_rate',
    'funded_amnt(log)',
    'grade',
    'sub_grade',
    'annual_inc(log)']

# categorical variables
cate_list = ['addr_state',
             'purpose',
             'home_ownership']

# make pivot table
pivot_ui(df[['loan_status'] + [x if '(log)' not in x else x[:-5] for x in num_list + cate_list]],
         outfile_path="pivot_table/2017Q2LendingClub.html")

# make hist
plt.figure()
df[num_list + ['loan_status']
   ].hist(bins=50, figsize=(15, 15), edgecolor='white')
plt.savefig('figures/dist.png')
plt.gcf().clear()

for var_name in ['funded_amnt', 'annual_inc']:
    get_map(var_name, log=1, method='mean')

for var_name in ['loan_status', 'grade', 'emp_length']:
    get_map(var_name, log=0, method='mean')

# handling imbalanced data

Exemple #27

0

Afficher le fichier

Fichier : test.py Projet : renferpur/python-code

                                                   regex=True)
DimDf['Bundle_IND'] = DimDf['PFAMName'].str.contains('Bndl',
                                                     flags=re.IGNORECASE,
                                                     regex=True)

# In[ ]:

DimDf.head(20)

# In[ ]:

print(DimDf.dtypes)

# In[ ]:

pivot_ui(DimDf)

# In[ ]:

get_ipython().run_line_magic('matplotlib', 'inline')
msno.matrix(Df)

# In[288]:

sns.pairplot(Df)

# In[ ]:

sns.pairplot(Df, hue="Day")

# In[ ]:

Exemple #28

0

Afficher le fichier

Fichier : FMP_Finding Stocks.py Projet : junwei2110/Stock-Analysis

metrics = {}

for i in range(0, len(pricing)):

    for j in range(0, len(stock_list)):
        if stock_list[j] == pricing[i]['symbol']:
            metrics[stock_list[j]] = {}
            metrics[stock_list[j]]["Stock"] = pricing[i]["symbol"]
            metrics[stock_list[j]]["Price"] = pricing[i]["price"]
            metrics[stock_list[j]]["50-day Moving Average"] = pricing[i][
                "priceAvg50"]
            metrics[stock_list[j]]["200-day Moving Average"] = pricing[i][
                "priceAvg200"]
            metrics[stock_list[j]]["PE Ratio"] = pricing[i]["pe"]

            if pricing[i]["price"] is None:
                break
            else:
                metrics[stock_list[j]]["Delta from 50-Avg"] = pricing[i]["price"] - \
                    pricing[i]["priceAvg50"]
                metrics[stock_list[j]]["Delta from 200-Avg"] = pricing[i]["price"] - \
                    pricing[i]["priceAvg200"]

# Transform the dictionary into a Pandas
metrics_df = pd.DataFrame.from_dict(metrics, orient='index')

# Export to Excel or directly to pivot table
pivot_ui(metrics_df)
with pd.ExcelWriter('stock select.xlsx', mode='a') as writer:
    metrics_df.to_excel(writer, sheet_name='trial')

Exemple #29

0

Afficher le fichier

Fichier : data_viz-checkpoint.py Projet : stefanuddenberg/Princetonpy-Jupyter-Workshop

# Show information when hovering the mouse over datapoints
this_plot.add_tools(tools.HoverTool(tooltips=[("Day",
                                               "@day")]))  # @ chooses feature

# Hide all circles of a given category when clicked in legend
this_plot.legend.click_policy = "hide"

output_notebook()
show(this_plot)

# %% [markdown] {"slideshow": {"slide_type": "slide"}}
# # Pivot table plots

# %%
from pivottablejs import pivot_ui
pivot_ui(tips)

# %% [markdown] {"slideshow": {"slide_type": "slide"}}
# # Dash/Plotly
# [Dash/Plotly](https://plot.ly/products/dash/) is another package for producing really nice and interactive graphs, but it requires signing up for an account to initialize it. After initialization you can use it online by default (which means all of your graphs get saved to the cloud for everyone to see forever) or you can use it offline (as demoed below). Examples taken or modified from [here](https://plot.ly/python/ipython-notebook-tutorial/).
#
# I'm not familiar with the new Dash API that's been recently introduced, nor have I really explored using Plotly. I've been able to get everything that I need done in Matplotlib/Seaborn, so understand that the code snippets below may no longer work with recent versions of the Plotly package (which seems like a different thing to Dash).

# %% [markdown] {"slideshow": {"slide_type": "skip"}}
# ## Troubleshooting setup
# When I first tried using plotly I sometimes got `IOPub data rate exceeded` errors. Here's how you fix that:
#
# - run `jupyter notebook --generate-config` to generate a clean configuration file with all parameters commented out
# - modify `c.NotebookApp.iopub_data_rate_limit` and `c.NotebookApp.iopub_msg_rate_limit` to be some absurdly large numbers

# %% [markdown] {"slideshow": {"slide_type": "subslide"}}

Exemple #30

0

Afficher le fichier

#numbers4.rolling('8h', min_periods=1).sum()

grouped = numbers4.groupby(['WeekNo', 'TestTestedBy']).agg({
    'Total_Hrs': 'sum',
    'CEP_Hrs': 'sum',
    'UTM_Hrs': 'sum',
    'AdjustmentTime': 'sum',
    'hrs_lead': 'mean'
}).round().reset_index()
#grouped = numbers4.groupby(['WeekNo','TestTestedBy'])['Total_Hrs','CEP_Hrs','UTM_Hrs','AdjustmentTime'].sum().reset_index()

# In[ ]:

from pivottablejs import pivot_ui
pivot_ui(numbers4)

# # The results for number of hours completed

# In[3]:

grouped = grouped.sort_values(['WeekNo', 'Total_Hrs'],
                              ascending=[False, False]).reset_index()
#grouped = grouped.sort_values(['WeekNo','Total_Hrs'], ascending=[False,False]).reset_index()
grouped.index = grouped.index + 1
grouped.drop(['index'], axis=1, inplace=True)
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 0])
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 'barneje'])
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 'loncmm1'])
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 'lonutm1'])
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 'lonutm2'])

Exemple #31

0

Afficher le fichier

    def validate(ss, config):
        """Apply validation process using config input file."""
        source_read_df = (ss.read.format("csv").option("header", "true").load(
            config.source_df))
        comparable_dfs_list = [
            (t, ss.read.format("csv").option("header", "true").load(t))
            for t in config.comparable_dfs_list
        ]

        validator = DataframeValidator(
            spark=ss,
            source_df=source_read_df,
            id_col_name=config.id_col_name,
            correctness_rules_dict=config.correctness_rules_dict,
            parent_children_validation_pairs=config.
            parent_children_validation_pairs,
            completeness_rules_dic=config.completeness_rules_dic,
            comparable_dfs_list=comparable_dfs_list,
            unique_column_group_values_per_table=config.
            unique_column_group_values_per_table,
        )

        processed_df = validator.process()
        completeness_df = processed_df.limit(1).select(
            Constants.OVER_ALL_COUNT_COL,
            Constants.IS_ERROR_COL + Constants.OVER_ALL_COUNT_COL,
            Constants.DATE_TIME_REPORT_COL,
        )

        correctness_df = processed_df.drop(
            Constants.OVER_ALL_COUNT_COL,
            Constants.IS_ERROR_COL + Constants.OVER_ALL_COUNT_COL,
        )
        comparison_df = validator.compare()

        correctness_df.coalesce(1).write.mode("append").json(
            config.output_correctness_table)
        completeness_df.coalesce(1).write.mode("append").json(
            config.output_completeness_table)
        comparison_df.coalesce(1).write.mode("append").json(
            config.output_comparison_table)

        pd_correctness_df = ss.read.json(
            config.output_correctness_table).toPandas()
        pd_completeness_df = ss.read.json(
            config.output_completeness_table).toPandas()
        comparison_df = ss.read.json(config.output_comparison_table).toPandas()

        pivot_ui(
            pd_correctness_df,
            outfile_path="{}.html".format(config.output_correctness_table),
            menuLimit=5000,
            overwrite=True,
            rows=[config.id_col_name] + list(
                filter(
                    lambda x: Constants.IS_ERROR_COL in x and Constants.
                    SUM_REPORT_SUFFIX not in x and Constants.ROW_ERROR_SUFFIX
                    not in x,
                    pd_correctness_df.columns,
                )),
            cols=[Constants.DATE_TIME_REPORT_COL],
            vals=[Constants.IS_ERROR_COL + Constants.ROW_ERROR_SUFFIX],
            aggregatorName="Sum",
            rendererName="Table Barchart",
            rowOrder="value_z_to_a",
        )

        pivot_ui(
            pd_completeness_df,
            outfile_path="{}.html".format(config.output_completeness_table),
            menuLimit=5000,
            overwrite=True,
            rows=[Constants.OVER_ALL_COUNT_COL],
            cols=[Constants.DATE_TIME_REPORT_COL],
            vals=[Constants.IS_ERROR_COL + Constants.OVER_ALL_COUNT_COL],
            aggregatorName="Sum",
            rendererName="Table Barchart",
            rowOrder="value_z_to_a",
        )

        pivot_ui(
            comparison_df,
            outfile_path="{}.html".format(config.output_comparison_table),
            menuLimit=5000,
            overwrite=True,
            rows=list(
                filter(
                    lambda x: Constants.DATE_TIME_REPORT_COL not in x,
                    comparison_df.columns,
                )),
            cols=[Constants.DATE_TIME_REPORT_COL],
            rendererName="Table Barchart",
            rowOrder="value_z_to_a",
        )