def detect_df_from_file(df_path, constraints_path, outpath, verbose=True, **kwargs): if df_path == '-' or df_path is None: df_path = StringIO(sys.stdin.read()) if constraints_path is None: if not isinstance(df_path, StringIO): split = os.path.splitext(df_path) if split[1] in ('.csv', '.feather'): constraints_path = split[0] + '.tdda' if constraints_path is None: print('No constraints file specified.', file=sys.stderr) sys.exit(1) df = load_df(df_path) from_feather = file_format(df_path) == 'feather' v = detect_df(df, constraints_path, outpath=outpath, rownumber_is_index=from_feather, **kwargs) if verbose and outpath is not None and outpath != '-': print(v) return v
def find_with_tdda(df, show=True): v = detect_df(df, 'constraints.tdda', per_constraint=True, output_fields=[]) bads = v.detected() show_df(bads, 'BAD RECORDS (FOUND WITH TDDA)', show, all_cols=True) return bads
def my_detect_df(df, TDDA_FILE, *args, **kwargs): v = detect_df(df, TDDA_FILE, *args, **kwargs) if v.failures == 0: print('Correctly verified dataframe against constraints in %s.' % TDDA_FILE) else: print('*** Unexpectedly failed to verify dataframe against constraints' ' in %s.\nSomething is wrong!' % TDDA_FILE) print(v)
def testDetectElements118rexToDataFrame(self): csv_path = os.path.join(TESTDATA_DIR, 'elements118.csv') df = pd.read_csv(csv_path) constraints_path = os.path.join(TESTDATA_DIR, 'elements92rex.tdda') v = detect_df(df, constraints_path, output_fields=['Z']) self.assertEqual(v.passes, 61) self.assertEqual(v.failures, 17) ddf = v.detected() self.assertStringCorrect(ddf.to_string(), 'elements118rex_detect.df')
import os import pandas as pd import sys from tdda.constraints.pd.constraints import detect_df # NOQA inpath = '../data/processed/wrangled_dataframe.csv' constraint_path = ''.join([ '../data/interim/constraints_initial_csvs/', 'wrangled_dataframe_constraints.tdda' ]) outpath = '../data/interim/constraints_initial_csvs/wrangled_anomalies.tdda' df = pd.read_csv(inpath, low_memory=False) v = detect_df(df, constraint_path) detection_df = v.detected() if detection_df: print(detection_df.to_string()) with open(outpath, 'w') as f: detection_df.to_csv(f) else: print(f'No anomalies detected between {inpath} and {constraint_path}.') with open(outpath, 'w') as f: f.write( f'No anomalies detected between {inpath} and {constraint_path}.') if os.path.exists(outpath): print('Written %s successfully.' % outpath) sys.exit(0) else: print('Failed to write %s.' % outpath, file=sys.stderr)
# accounts_detect_25k_against_1k.py from __future__ import print_function import pandas as pd from tdda.constraints.pd.constraints import detect_df df = pd.read_csv('testdata/accounts25k.csv') print(detect_df(df, 'accounts1k.tdda', outpath='accounts25k_detect.csv', per_constraint=True, output_fields=[]))
# elements_detect_118_against_92.py from __future__ import print_function import pandas as pd from tdda.constraints.pd.constraints import detect_df df = pd.read_csv('testdata/elements118.csv') print( detect_df(df, 'elements92.tdda', outpath='elements118_detect.csv', per_constraint=True, output_fields=[]))
# elements_detect_118_against_92.py from __future__ import print_function import pandas as pd from tdda.constraints.pd.constraints import detect_df df = pd.read_csv('testdata/elements118.csv') print(detect_df(df, 'elements92.tdda', outpath='elements118_detect.csv', per_constraint=True, output_fields=[]))
def detect_df_from_file(df_path, constraints_path, verbose=True, **kwargs): df = load_df(df_path) v = detect_df(df, constraints_path, **kwargs) if verbose: print(v) return v