Python Table.get_numerical Examples

Programming Language: Python

Namespace/Package Name: cognito.table

Class/Type: Table

Method/Function: get_numerical

Examples at hotexamples.com: 4

Python Table.get_numerical - 4 examples found. These are the top rated real world Python examples of cognito.table.Table.get_numerical extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Table(30)

columns(7)

get_categorical(6)

total_columns(4)

hot_encoder_categorical(4)

get_numerical(4)

even_rows(3)

ignore_cardinal(3)

odd_rows(3)

list_cardinal(3)

total_rows(3)

convert_to_bin(2)

generate(2)

fix_outlier_with_std_deviation(2)

slice(2)

summary(2)

imputer(2)

cardinal_columns(1)

correlation(1)

fix_missing(1)

numerical_columns(1)

save(1)

categorical_columns(1)

Example #1

Show file

File: test_modules.py Project: vandana-11/cognito

def test_table_get_numerical_3():
    data = Table(os.path.join(os.path.dirname(__file__), 'data', 'cereal.csv'))
    assert list(
        data.get_numerical().columns) == ['sodium', 'calories', 'sugars']

Example #2

Show file

File: test_modules.py Project: vandana-11/cognito

def test_table_get_numerical_1():
    data = Table(
        os.path.join(os.path.dirname(__file__), 'data', 'Freedman.csv'))
    assert list(data.get_numerical().columns) == [
        'population', 'nonwhite', 'density', 'crime'
    ]

Example #3

Show file

File: test_modules.py Project: vandana-11/cognito

def test_table_get_numerical_2():
    data = Table(
        os.path.join(os.path.dirname(__file__), 'data', 'msleep_ggplot.csv'))
    assert list(data.get_numerical().columns) == [
        'sleep_total', 'sleep_rem', 'sleep_cycle', 'awake', 'brainwt', 'bodywt'
    ]

Example #4

Show file

File: cli.py Project: NAnnamalai/cognito

def audit(inp, save):

    if inp:
        start_time = datetime.now()
        description = PrettyTable(['Name', 'Values'])
        description.align['Name'] = "l"

        table = PrettyTable([
            'Features', 'Type', 'Value Type', 'Outliers', 'Missing',
            '(%) Missing', 'Distinct Count', 'Min', 'Mean', 'Max', 'Zeros',
            '(%) Zeros', 'Memory Size'
        ])
        try:
            df_raw = Table(inp)
            df = df_raw.data
            features = df.columns
            table.align["Features"] = "l"
            table.align["Value Type"] = "l"
            table.sortby = "Distinct Count"
            table.reversesort = True

            # Generate dynamic analytical stories
            stories = get_interesting_stories(df_raw)

            # Group duplicates
            dups = df.groupby(df.columns.tolist()).size().reset_index().rename(
                columns={0: 'count'})

            description.add_row(['Total variables', df.shape[1]])
            description.add_row(['Total Observations', df.shape[0]])
            description.add_row(['Missing Cells', df.isnull().sum().sum()])
            description.add_row(
                ['(%) Missing Cells',
                 df.isnull().sum().sum() / len(df)])
            description.add_row(
                ['Duplicate Rows', dups['count'].sum() - dups.shape[0]])
            description.add_row([
                '(%) Duplicate Rows',
                (dups['count'].sum() - dups.shape[0]) / len(dups)
            ])
            description.add_row([
                'Total Size of Memory',
                str(df.memory_usage().sum() / 1000) + 'KiB'
            ])
            description.add_row(['🍋 Total Categorical', count_categorical(df)])
            description.add_row(
                ['🔟 Total Continuous',
                 len(df_raw.get_numerical().columns)])
            #description.add_row(['Started at', start_time.strftime("%d-%b-%y %H:%M:%S")])

            for col in tqdm(features, ascii=True, desc="Auditing.. : "):
                table.add_row([
                    col.strip(), df[col].dtypes,
                    type_of_variable(df[col]),
                    check_outlier(df[col]),
                    check_missing(df[col]),
                    column_missing_percentage(df[col]),
                    distinct_count(df[col]),
                    count_min(df[col]),
                    count_mean(df[col]),
                    count_max(df[col]), df[col].isin([0]).sum(),
                    round(df[col].isin([0]).sum() / len(df.columns), 2),
                    str(df[col].memory_usage() / 1000) + 'KiB'
                ])

            end_time = datetime.now()
            #description.add_row(['Ended at', end_time.strftime("%d-%b-%y %H:%M:%S")])
            description.add_row(
                ['Time Elapsed', (end_time - start_time).total_seconds()])

            # Save the report into HTML
            if save:

                desc_html = description.get_html_string()
                description_summary = desc_html.replace(
                    '<table>', '<table class="table is-bordered">')

                report_html = table.get_html_string()
                report_html = report_html.replace(
                    '<table>', '<table class="table is-bordered">')

                stories_json = [{
                    'question': row['question'],
                    'answer': row['answer']
                } for row in stories]
                loader = template.Loader(
                    os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'templates'))

                total_categorical = list(df_raw.get_categorical())
                total_continuous = list(df_raw.get_numerical())

                open(save + '.html', 'wb').write(
                    loader.load("index.html").generate(**locals()))
                click.echo('Report generated with name ' + save + '.html')

            click.echo(description)
            click.echo(table)

        except FileNotFoundError as e:
            logging.warning("Given input file doesn't exists")
            print(
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'templates'))