예제 #1
0
파일: detect.py 프로젝트: simonbrownsb/tdda
def detect_df_from_file(df_path,
                        constraints_path,
                        outpath,
                        verbose=True,
                        **kwargs):
    if df_path == '-' or df_path is None:
        df_path = StringIO(sys.stdin.read())
    if constraints_path is None:
        if not isinstance(df_path, StringIO):
            split = os.path.splitext(df_path)
            if split[1] in ('.csv', '.feather'):
                constraints_path = split[0] + '.tdda'
        if constraints_path is None:
            print('No constraints file specified.', file=sys.stderr)
            sys.exit(1)

    df = load_df(df_path)
    from_feather = file_format(df_path) == 'feather'
    v = detect_df(df,
                  constraints_path,
                  outpath=outpath,
                  rownumber_is_index=from_feather,
                  **kwargs)
    if verbose and outpath is not None and outpath != '-':
        print(v)
    return v
예제 #2
0
def find_with_tdda(df, show=True):
    v = detect_df(df,
                  'constraints.tdda',
                  per_constraint=True,
                  output_fields=[])
    bads = v.detected()
    show_df(bads, 'BAD RECORDS (FOUND WITH TDDA)', show, all_cols=True)
    return bads
예제 #3
0
def my_detect_df(df, TDDA_FILE, *args, **kwargs):
    v = detect_df(df, TDDA_FILE, *args, **kwargs)
    if v.failures == 0:
        print('Correctly verified dataframe against constraints in %s.' %
              TDDA_FILE)
    else:
        print('*** Unexpectedly failed to verify dataframe against constraints'
              ' in %s.\nSomething is wrong!' % TDDA_FILE)
        print(v)
예제 #4
0
 def testDetectElements118rexToDataFrame(self):
     csv_path = os.path.join(TESTDATA_DIR, 'elements118.csv')
     df = pd.read_csv(csv_path)
     constraints_path = os.path.join(TESTDATA_DIR, 'elements92rex.tdda')
     v = detect_df(df, constraints_path, output_fields=['Z'])
     self.assertEqual(v.passes, 61)
     self.assertEqual(v.failures, 17)
     ddf = v.detected()
     self.assertStringCorrect(ddf.to_string(), 'elements118rex_detect.df')
예제 #5
0
파일: detect.py 프로젝트: tdda/tdda
def detect_df_from_file(df_path, constraints_path, outpath,
                        verbose=True, **kwargs):
    if df_path == '-' or df_path is None:
        df_path = StringIO(sys.stdin.read())
    if constraints_path is None:
        if not isinstance(df_path, StringIO):
            split = os.path.splitext(df_path)
            if split[1] in ('.csv', '.feather'):
                constraints_path = split[0] + '.tdda'
        if constraints_path is None:
            print('No constraints file specified.', file=sys.stderr)
            sys.exit(1)

    df = load_df(df_path)
    from_feather = file_format(df_path) == 'feather'
    v = detect_df(df, constraints_path, outpath=outpath,
                  rownumber_is_index=from_feather, **kwargs)
    if verbose and outpath is not None and outpath != '-':
        print(v)
    return v
import os
import pandas as pd
import sys

from tdda.constraints.pd.constraints import detect_df  # NOQA

inpath = '../data/processed/wrangled_dataframe.csv'
constraint_path = ''.join([
    '../data/interim/constraints_initial_csvs/',
    'wrangled_dataframe_constraints.tdda'
])
outpath = '../data/interim/constraints_initial_csvs/wrangled_anomalies.tdda'

df = pd.read_csv(inpath, low_memory=False)
v = detect_df(df, constraint_path)
detection_df = v.detected()
if detection_df:
    print(detection_df.to_string())
    with open(outpath, 'w') as f:
        detection_df.to_csv(f)
else:
    print(f'No anomalies detected between {inpath} and {constraint_path}.')
    with open(outpath, 'w') as f:
        f.write(
            f'No anomalies detected between {inpath} and {constraint_path}.')

if os.path.exists(outpath):
    print('Written %s successfully.' % outpath)
    sys.exit(0)
else:
    print('Failed to write %s.' % outpath, file=sys.stderr)
예제 #7
0
# accounts_detect_25k_against_1k.py

from __future__ import print_function
import pandas as pd

from tdda.constraints.pd.constraints import detect_df

df = pd.read_csv('testdata/accounts25k.csv')
print(detect_df(df, 'accounts1k.tdda', outpath='accounts25k_detect.csv',
                per_constraint=True, output_fields=[]))

# elements_detect_118_against_92.py

from __future__ import print_function
import pandas as pd

from tdda.constraints.pd.constraints import detect_df

df = pd.read_csv('testdata/elements118.csv')
print(
    detect_df(df,
              'elements92.tdda',
              outpath='elements118_detect.csv',
              per_constraint=True,
              output_fields=[]))
예제 #9
0
# elements_detect_118_against_92.py

from __future__ import print_function
import pandas as pd

from tdda.constraints.pd.constraints import detect_df

df = pd.read_csv('testdata/elements118.csv')
print(detect_df(df, 'elements92.tdda', outpath='elements118_detect.csv',
                per_constraint=True, output_fields=[]))

예제 #10
0
파일: detect.py 프로젝트: synapticlee/tdda
def detect_df_from_file(df_path, constraints_path, verbose=True, **kwargs):
    df = load_df(df_path)
    v = detect_df(df, constraints_path, **kwargs)
    if verbose:
        print(v)
    return v