Exemple #1
0
 def __init__(self, data: pd.DataFrame):
     # This takes the dataframe and computes all the inputs to the Facets
     # Overview plots such as:
     # - numeric variables: histogram bins, mean, min, median, max, etc..
     # - categorical variables: num unique, counts per category for bar chart,
     #     top category, etc.
     gfsg = GenericFeatureStatisticsGenerator()
     self._proto = gfsg.ProtoFromDataFrames([{
         'name': 'data',
         'table': data
     }], )
Exemple #2
0
 def _display_overview(self, data, update=None):
     gfsg = GenericFeatureStatisticsGenerator()
     proto = gfsg.ProtoFromDataFrames([{'name': 'data', 'table': data}])
     protostr = base64.b64encode(proto.SerializeToString()).decode('utf-8')
     if update:
         script = _OVERVIEW_SCRIPT_TEMPLATE.format(display_id=update,
                                                   protostr=protostr)
         display_javascript(Javascript(script))
     else:
         html = _OVERVIEW_HTML_TEMPLATE.format(
             display_id=self._overview_display_id, protostr=protostr)
         display(HTML(html))
Exemple #3
0
  def _display_overview(self, data, update=None):
    if (not data.empty and self._include_window_info and
        all(column in data.columns
            for column in ('event_time', 'windows', 'pane_info'))):
      data = data.drop(['event_time', 'windows', 'pane_info'], axis=1)

    gfsg = GenericFeatureStatisticsGenerator()
    proto = gfsg.ProtoFromDataFrames([{'name': 'data', 'table': data}])
    protostr = base64.b64encode(proto.SerializeToString()).decode('utf-8')
    if update:
      script = _OVERVIEW_SCRIPT_TEMPLATE.format(
          display_id=update._overview_display_id, protostr=protostr)
      display_javascript(Javascript(script))
    else:
      html = _OVERVIEW_HTML_TEMPLATE.format(
          display_id=self._overview_display_id, protostr=protostr)
      display(HTML(html))
Exemple #4
0
def overview(tables: typing.Union[pandas.DataFrame, typing.Mapping[str, pandas.DataFrame]]) -> HTML:
    # Element ID MUST be unique
    elem_id = _generate_element_id()

    if isinstance(tables, pandas.DataFrame):
        tables = {"default": tables}

    table_list = []
    for name, table in tables.items():
        # Convert PandasExtensionDType column to object column because facets currently doesn't support it.
        view = table.copy()
        for k, v in view.dtypes.iteritems():
            if not isinstance(v, numpy.dtype):
                view[k] = view[k].astype(object)

        table_list.append({'name': name, 'table': view})

    proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(table_list)
    proto_str = base64.b64encode(proto.SerializeToString()).decode("utf-8")
    return HTML(FACETS_OVERVIEW_TEMPLATE.format(elem_id=elem_id, proto_str=proto_str))
Exemple #5
0
def generate_facets(config, df):

    proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames([{
        'name':
        'facets-iss',
        'table':
        df
    }])
    protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")

    HTML_TEMPLATE = """
            <script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
            <link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html" >
            <facets-overview id="elem"></facets-overview>
            <script>
            document.querySelector("#elem").protoInput = "{protostr}";
            </script>"""
    html = HTML_TEMPLATE.format(protostr=protostr)

    return html
Exemple #6
0
    def generate_html(self, datasets: List[Dict[Text, pd.DataFrame]]) -> str:
        """Generates html for facet.

        Args:
            datasets: List of dicts of dataframes to be visualized as stats.

        Returns:
            HTML template with proto string embedded.
        """
        proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
            datasets)
        protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")

        template = os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            "stats.html",
        )
        html_template = fileio.read_file_contents_as_string(template)

        html_ = html_template.replace("protostr", protostr)
        return html_
Exemple #7
0
def tables1():
    target = os.path.join(APP_ROOT, "/home/aayushi/ml-simu")
    # print(target)

    if not os.path.isdir(target):
        os.mkdir(target)

    file = request.files["file"]
    # print(file)
    destination = os.path.join(target, file.filename)
    print(destination)
    # d = destination
    file.save(destination)
    data = pd.read_csv(destination)
    from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator
    import base64

    gfsg = GenericFeatureStatisticsGenerator()
    proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': data}])
    protostr1 = base64.b64encode(proto.SerializeToString()).decode("utf-8")
    # print(protostr1)

    return render_template("examples/tables.html", data=protostr1)
 def setUp(self):
     self.gfsg = GenericFeatureStatisticsGenerator()
Exemple #9
0
# -*- coding: utf-8 -*-
import pandas as pd
import dash
import dash_html_components as html
import base64
from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator


DEBUG = True
data = pd.read_csv("dataset.csv")
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
gfsg = GenericFeatureStatisticsGenerator()
proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': data}])
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")

app = dash.Dash('')

app.layout = html.Div(children=[
    html.Iframe(
        width="1200",
        height="800",
        srcDoc= """
       <script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
        <link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html" >
        <facets-overview id="elem"></facets-overview>
        <script>
          document.querySelector("#elem").protoInput = "{protostr}";
        </script>""".format(protostr=protostr)
    ),
])
server = app.server
Exemple #10
0
#%%

my_pipeline = make_pipeline(Imputer(), RandomForestRegressor())
my_pipeline.fit(train_X, train_y)
predictions = my_pipeline.predict(test_X)
print("Error:" + str(mean_absolute_error(predictions, test_y)))

#%%
train = pd.read_csv('ML/train.csv')
test = pd.read_csv('ML/test.csv')

from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator

#%%
proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames([{
    'name': 'test',
    'table': test
}])

#%%
from IPython.core.display import display, HTML
import base64
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
HTML_TEMPLATE = """<link rel="import" href="/nbextensions/facets-dist/facets-jupyter.html" >
        <facets-overview id="elem"></facets-overview>
        <script>
          document.querySelector("#elem").protoInput = "{protostr}";
        </script>"""
html = HTML_TEMPLATE.format(protostr=protostr)
display(HTML(html))

#%%