def generatereport(self):
     try:
         profile = ProfileReport(self.dataset)
         profile.to_file(output_file="AnalysisReport.html")
         print("yes")
     except Exception as e:
         print(repr(e))
def test_multiple_times(test_output_dir):
    n_rows = 10
    n_columns = 10

    df = pd.DataFrame(
        np.random.randint(0, 1000, size=(n_rows, n_columns)),
        columns=[f"column_{c}" for c in range(n_columns)],
    )

    profile = ProfileReport(df,
                            minimal=True,
                            html={
                                "inline": False,
                                "use_local_assets": False
                            })

    report = test_output_dir / "cdn_multi.html"
    profile.to_file(report)
    n_images = len(
        list(Path("./cdn_multi_assets/").joinpath("images").glob("*")))
    assert report.exists()
    assets_dir = test_output_dir / "cdn_multi_assets"
    check_assets(assets_dir, "svg", n_css=1, n_js=1)
    profile.to_file(report)
    assert report.exists()
    assert n_images == len(
        list(Path("./cdn_multi_assets/").joinpath("images").glob("*")))
Beispiel #3
0
def EDA(source_df, reply_df):
    """function that does exploratory data analysis on the data"""

    # Creating profile report
    source_report = ProfileReport(source_df,
                                  title='Profile Report',
                                  html={'style': {
                                      'full_width': True
                                  }})
    source_report.to_notebook_iframe()
    source_report.to_file(output_file="EDA_source_report.html")

    reply_report = ProfileReport(reply_df,
                                 title='Profile Report',
                                 html={'style': {
                                     'full_width': True
                                 }})
    reply_report.to_notebook_iframe()
    reply_report.to_file(output_file="EDA_reply_report.html")

    correlation_heatmap(source_df)
    correlation_heatmap(reply_df)

    import pdfkit
    pdfkit.from_file('EDA_source_report.html', 'EDA_source_report.pdf')
    pdfkit.from_file('EDA_reply_report.html', 'EDA_reply_report.pdf')
Beispiel #4
0
def upload_csv():
    if request.method == "POST":

        file = request.files["file"]

        # Save the file to ./uploads
        basepath = os.path.dirname(__file__)
        file_path = os.path.join(basepath, 'uploads',
                                 secure_filename(file.filename))
        file.save(file_path)

        #processing and generating eda report in uploads folder
        df = pd.read_csv(file_path)
        filename_ = "uploads/" + file.filename + ".html"
        profile = ProfileReport(df, title=file.filename, explorative=True)
        profile.to_file(filename_)
        sweetviz_eda = sv.analyze(df)

        res = make_response(
            jsonify({
                "message":
                "File " + file.filename +
                " uploaded successfully and report is Ready. "
            }), 200)
        #showing both reports in new tabs
        webbrowser.open_new_tab(file_path + ".html")
        sweetviz_eda.show_html()
        return res

    return render_template("inex.html")
Beispiel #5
0
def generate_pandas_prof_report(df, title, explorative=True, dataset=None, minimal=True):
    """
    """
    if dataset is not None:
        module = select_from_db(ModulePandasProfiling,
                                'dataset_id', dataset.id)
        update_in_db(module, {'status': 'loading'})

    try:
        profile = ProfileReport(
            df, title=title, minimal=minimal, explorative=True)

        output_path = get_save_path()
        output_path = output_path + title + '.html'

        profile.to_file(output_path)

        if dataset is not None:
            data = {'status': 'loaded', 'path': output_path}
            res = update_in_db(module, data)

            if res != 'updated':
                update_in_db(module, {'status': 'failed'})

    except:
        if dataset is not None:
            update_in_db(module, {'status': 'failed'})
Beispiel #6
0
def create_report(df, filename=None):
    profile = ProfileReport(df, title='Pandas Profiling Report')

    if filename:
        profile.to_file(output_file = filename)
    else:
        return profile.to_notebook_iframe()
def result(request):
    df = pd.read_csv(r'C:\Users\hp\Downloads\diabetes.csv')
    profile = ProfileReport(df)
    profile.to_file(output_file='report.html')

    X = df.drop('Outcome', axis=1)
    Y = df['Outcome']

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    model = LogisticRegression()
    model.fit(X_train, Y_train)

    val1 = float(request.GET['n1'])
    val2 = float(request.GET['n2'])
    val3 = float(request.GET['n3'])
    val4 = float(request.GET['n4'])
    val5 = float(request.GET['n5'])
    val6 = float(request.GET['n6'])
    val7 = float(request.GET['n7'])
    val8 = float(request.GET['n8'])

    pred = model.predict([[val1, val2, val3, val4, val5, val6, val7, val8]])
    result1 = ''

    if pred == [1]:
        result1 = 'Positive'
    else:
        result1 = 'Negative'

    return render(request, 'predict.html', {'result2': result1})
def explore_df(df):
    profile = ProfileReport(df, title='DataFrame exploration')
    profile.to_file("report.html")

    with open("report.html", "r", encoding='utf-8') as f:
        text = f.read()
    return text
Beispiel #9
0
async def testing(request: web.Request):
    r = await request.post()
    data = r['file']  # data is the file

    headers = request.headers
    content_length = int(headers['Content-length'])
    projectName = "testing"

    os.makedirs(request.app['settings'].PROJECT_DIR + "/" + projectName,
                exist_ok=True)

    # Write ".FMU" to disc
    if ".csv" in data.filename:
        fmuPath = request.app[
            'settings'].PROJECT_DIR + "/" + projectName + "/" + data.filename
        with open(fmuPath, 'wb') as file:
            file.write(data.file.read(content_length))  # writes .fmu to file
        df = pd.read_csv(request.app['settings'].PROJECT_DIR + "/" +
                         projectName + "/" + data.filename)

        profile = ProfileReport(df,
                                title='Pandas Profiling Report',
                                html={'style': {
                                    'full_width': True
                                }})

        profile.to_file(output_file="your_report.html")
        with open("your_report.html", "r", encoding='utf-8') as f:
            text = f.read()
            print(text)
            return web.Response(text=text, content_type='text/html')

    else:
        return web.HTTPOk()
Beispiel #10
0
def profile_file(file_path, file_name, extension, output_path = '.', sep = None):
    """
    This function will load the given file using pandas and then will create a report using pandas-profiling.
    """
    try:
        if extension in PLAIN_FORMATS:
            separator = get_separator_char(sep)
            df = pd.read_csv(file_path, sep = separator)
            profile = ProfileReport(df)
            file_name = file_name.split('.')[0]
            report_name = '{}.html'.format(file_name)
            profile.to_file(report_name)
            return 
        elif extension == 'xlsx':
            excel_name = get_file_basename(file_path)
            excel_name += '_' + file_name
            df = pd.read_excel(file_path, sheet_name=file_name)
            profile = ProfileReport(df)
            report_name = '{}.html'.format(excel_name)
            profile.to_file(report_name)
        else:
            return 
    except:
        print("Can't open {}".format(file_path))
        return 
def test_html_export_theme(test_output_dir):
    n_rows = 10
    n_columns = 10

    df = pd.DataFrame(
        np.random.randint(0, 1000, size=(n_rows, n_columns)),
        columns=[f"column_{c}" for c in range(n_columns)],
    )

    profile = ProfileReport(
        df,
        minimal=True,
        html={
            "inline": False,
            "style": {
                "theme": "united"
            }
        },
    )

    report = test_output_dir / "united.html"
    profile.to_file(report)
    assert report.exists()
    assets_dir = test_output_dir / "united_assets"
    check_assets(assets_dir, "svg", n_css=2, n_js=3)
Beispiel #12
0
 def analyze_profile(self):
     """
     This function export an HTML file of data's report
     """
     from pandas_profiling import ProfileReport
     df_profiler = ProfileReport(self.df, title=self.title)
     df_profiler.to_file(self.path_to_export)
Beispiel #13
0
def feature_report(df):
    '''
    print a report of every feature
    '''
    profile = ProfileReport(df, bins=30)
    profile.to_file(outputfile="output.html")
    webbrowser.open('file://' + os.path.realpath("output.html"))
def test_example(get_data_file, test_output_dir):
    file_name = get_data_file(
        "meteorites.csv",
        "https://data.nasa.gov/api/views/gh4g-9sfh/rows.csv?accessType=DOWNLOAD",
    )

    df = pd.read_csv(file_name)
    # Note: Pandas does not support dates before 1880, so we ignore these for this analysis
    df["year"] = pd.to_datetime(df["year"], errors="coerce")

    # Example: Constant variable
    df["source"] = "NASA"

    # Example: Boolean variable
    df["boolean"] = np.random.choice([True, False], df.shape[0])

    # Example: Mixed with base types
    df["mixed"] = np.random.choice([1, "A"], df.shape[0])

    # Example: Highly correlated variables
    df["reclat_city"] = df["reclat"] + np.random.normal(scale=5,
                                                        size=(len(df)))

    # Example: Duplicate observations
    duplicates_to_add = pd.DataFrame(df.iloc[0:10])
    duplicates_to_add["name"] += " copy"

    df = df.append(duplicates_to_add, ignore_index=True)

    output_file = test_output_dir / "profile.html"
    profile = ProfileReport(df,
                            title="NASA Meteorites",
                            samples={
                                "head": 5,
                                "tail": 5
                            },
                            sort="ascending")
    profile.to_file(output_file=output_file)
    assert (test_output_dir /
            "profile.html").exists(), "Output file does not exist"
    assert (type(profile.get_description()) == dict and len(
        profile.get_description().items()) == 7), "Unexpected result"
    if sys.version_info[1] >= 6:
        assert list(profile.get_description()["variables"].keys()) == [
            "boolean",
            "fall",
            "GeoLocation",
            "id",
            "mass (g)",
            "mixed",
            "name",
            "nametype",
            "recclass",
            "reclat",
            "reclat_city",
            "reclong",
            "source",
            "year",
        ], "Ascending sort did not work"
Beispiel #15
0
 def save_report(df, name_report):
     if not path.exists(
             os.path.join(Helper.RAPPORT_DIR, f'{name_report}.html')):
         prof = ProfileReport(df)
         prof.to_file(output_file=os.path.join(Helper.RAPPORT_DIR,
                                               f'{name_report}.html'))
     else:
         print('File exists !')
Beispiel #16
0
def profile_report(dfToReport):
    dfToReport.set_index('date_time', inplace=True)
    profile = ProfileReport(dfToReport,
                            title='Pandas Profiling Report',
                            html={'style': {
                                'full_width': True
                            }})
    profile.to_file(output_file='output.html')
Beispiel #17
0
def main() -> None:
    if len(sys.argv) != 4:
        print("Usage: python3 analyze.py title input.csv output.html")
        return
    data_source = sys.argv[2]
    df = pd.read_csv(data_source)
    profile = ProfileReport(df, title=sys.argv[1])
    profile.to_file(sys.argv[3])
def make_report(params: Params):
    logger.info("EDA report preparation started")
    source_df = pd.read_csv(params.train_data_path)

    # report
    profile = ProfileReport(source_df)
    profile.to_file(output_file=params.report_path)
    logger.info("EDA report preparation completed")
Beispiel #19
0
def profiling(df: pd.DataFrame, sheet_name: str) -> None:
    """
    Just a function to create an HTML file with the profile from a given dataset
    """
    from pandas_profiling import ProfileReport
    prof = ProfileReport(df)
    prof.to_file(
        output_file='notebooks/full_profiling_{}.html'.format(sheet_name))
Beispiel #20
0
def data_scan(df, to_html=False):
    """
    doc : https://github.com/pandas-profiling/pandas-profiling
    """
    data_report = ProfileReport(df, title='Pandas Profiling Report', html={'style': {'full_width': True}})
    if to_html:
        data_report.to_file(output_file="data_scan.html")
    else:
        return data_report
Beispiel #21
0
def generate_pandas_prof_report(df, title):
    """
    """
    profile = ProfileReport(df, title=title, explorative=False)

    output_path = get_save_path()
    output_path = output_path + title + '.html'

    profile.to_file(output_path)
Beispiel #22
0
def generate_html_profiling(filename, report_name):
    """
    Generate a panda profiling report for a filename file and save it as a report_name html file.
    """
    df = open_data(filename)
    profile = ProfileReport(df,
                            title='Profiling Report of {}'.format(filename),
                            explorative=True)
    profile.to_file(report_name)
Beispiel #23
0
def show_graph():
    try:
        if request.method == 'POST':
            graph_data = pd.read_csv(r'graph_input_files\graph_data.csv')
            prof = ProfileReport(graph_data)
            prof.to_file(output_file=r'templates\bulk_graph_output.html')
            return render_template('bulk_graph_output.html')
    except Exception as e:
        raise e
Beispiel #24
0
def generate_report(config: ReportConfig):
    out_path = to_absolute_path(config.output_path)
    in_path = to_absolute_path(config.input_path)
    data = pd.read_csv(in_path)
    profile = ProfileReport(data, title="Profiling Report", explorative=True)

    logger.info("Save report to %s", out_path)
    check_dir(os.path.split(out_path)[0])
    profile.to_file(str(out_path))
Beispiel #25
0
    def generate_profile(self):
        """Produces and saves the result profile in html form."""
        profile = ProfileReport(
            pd.read_csv(self._filename),
            title=f'{self._filename} profile',
            explorative=True)

        profile.to_file(
            os.path.join(self._dst_folder, self.PROFILING_ARTIFACT))
Beispiel #26
0
def generate_incorrect_report(incorrect_list, columns):
    """given list of incorrect predictions generate profile report"""
    # make incorrect df to find patterns
    data = np.vstack([
        np.hstack([tup[2], tup[0], tup[-1]]) for list_ in incorrect_list
        for tup in list_
    ])
    incorrect_df = pd.DataFrame(data, columns=columns.tolist() + ["p"])
    profile = ProfileReport(incorrect_df, explorative=True)
    profile.to_file("assets/wip/report_incorrect.html")
Beispiel #27
0
 def make_profile(self, db_info):
     dsn = 'file:{}?immutable=1'.format(db_info['db_path'])
     conn = sqlite3.connect(dsn)
     sql = 'SELECT * FROM [{}]'.format(db_info['table_name'])
     df = pd.read_sql(sql, con=conn)
     if (app.config['PANDAS_PROFILING_CONFIG_MIN']):
         profile = ProfileReport(df, config_file="profiling-minimal.yml")
     else:
         profile = ProfileReport(df)
     profile.to_file(db_info['profile_path'])
     return Path(db_info['profile_path'])
Beispiel #28
0
def profiling(csv_source: str, report_title: str, profile_out: str):
    """Performs the profiling.

    Args:
        csv_source (str): The path to the input file.
        report_title (str): The report title.
        profile_out (str): The path to the file where the profiling is stored.
    """
    df = pd.read_csv(csv_source)
    profile = ProfileReport(df, title=report_title)
    profile.to_file(profile_out)
def create_pandas_profiling_report(df, df_name):
    """Creates pandas profiling report an Dataframe and saves it in html format to disk.

    Args:
        df ([Pandas Dataframe]): Dataframe which should be analyzed.
        df_name ([str]): Name of dataframe which is used in stored filename
    """
    df_profile = ProfileReport(df, title=(df_name + ' Report'), minimal=True)
    df_profile.to_file(
        (os.getcwd() + '/../data/4_data_analysis/' + df_name + '_report.html'))
    print(f'\nPandas profiling report of file {df_name} created\n\n')
Beispiel #30
0
 def fit(df_path, report_path):
     """
     generate report from df       
     :param df_path: dataframe path      
     :returns: null  
     """
     df = pd.read_csv(df_path)
     profile = ProfileReport(
         df, title="Pandas Profiling Report Before prepocessing")
     print("report generated on" + report_path + ".html")
     profile.to_file(report_path + ".html")