Beispiel #1
0
 def constraintsGenerationTest(self, inc_rex=False):
     csv_path = os.path.join(TESTDATA_DIR, 'elements92.csv')
     df = pd.read_csv(csv_path)
     ref_name = 'elements92%s.tdda' % ('rex' if inc_rex else '')
     ref_constraints_path = os.path.join(TESTDATA_DIR, ref_name)
     with open(ref_constraints_path) as f:
         refjson = f.read()
     ref = native_definite(json.loads(refjson))
     constraints = discover_df(df, inc_rex=inc_rex)
     discovered = native_definite(json.loads(constraints.to_json()))
     discovered_fields = discovered['fields']
     ref_fields = ref['fields']
     self.assertEqual(set(discovered_fields.keys()), set(ref_fields.keys()))
     for field, ref_field in ref_fields.items():
         ref_field = ref_fields[field]
         discovered_field = discovered_fields[field]
         self.assertEqual((field, set(discovered_field.keys())),
                          (field, set(ref_field.keys())))
         for c, expected in ref_field.items():
             actual = discovered_field[c]
             if type(expected) == float:
                 self.assertAlmostEqual(actual, expected, 4)
             elif type(expected) == list:
                 self.assertEqual(set(actual), set(expected))
             elif expected in ('int', 'real'):  # pandas too broken to
                 # get this right for now
                 self.assertTrue(actual in ('int', 'real'))
             else:
                 self.assertEqual(actual, expected)
Beispiel #2
0
def discover_df_from_file(df_path, constraints_path, verbose=True, **kwargs):
    df = load_df(df_path)
    constraints = discover_df(df, **kwargs)
    output = constraints.to_json()
    if constraints_path:
        with open(constraints_path, 'w') as f:
            f.write(output)
    elif verbose:
        print(output)
    return output
Beispiel #3
0
def example_constraint_generation(path=OUTPATH):

    df = pd.DataFrame({'a': [1, 2, 9], 'b': ['one', 'two', np.NaN]})
    constraints = discover_df(df)

    if os.path.exists(path):
        os.unlink(path)

    with open(path, 'w') as f:
        f.write(constraints.to_json())

    if os.path.exists(path):
        print('Written %s successfully.' % path)
        sys.exit(0)
    else:
        print('Failed to write %s.' % path, file=sys.stderr)
        sys.exit(1)
Beispiel #4
0
def example_constraint_generation(path=OUTPATH):

    df = pd.DataFrame({'a': [1, 2, 9], 'b': ['one', 'two', pd.np.NaN]})
    constraints = discover_df(df)

    if os.path.exists(path):
        os.unlink(path)

    with open(path, 'w') as f:
        f.write(constraints.to_json())

    if os.path.exists(path):
        print('Written %s successfully.' % path)
        sys.exit(0)
    else:
        print('Failed to write %s.' % path, file=sys.stderr)
        sys.exit(1)
Beispiel #5
0
def discover_df_from_file(df_path, constraints_path, verbose=True, **kwargs):
    md_df_path = df_path
    if df_path == '-':
        df_path = StringIO(sys.stdin.read())
        md_df_path = None
    df = load_df(df_path)
    constraints = discover_df(df, df_path=md_df_path, **kwargs)
    if constraints is None:
        # should never happen
        return

    output = constraints.to_json(tddafile=constraints_path)
    if constraints_path and constraints_path != '-':
        with open(constraints_path, 'w') as f:
            f.write(output)
    elif verbose or constraints_path == '-':
        print(output)
    return output
Beispiel #6
0
def discover_df_from_file(df_path, constraints_path, verbose=True, **kwargs):
    md_df_path = df_path
    if df_path == '-':
        df_path = StringIO(sys.stdin.read())
        md_df_path = None
    df = load_df(df_path)
    constraints = discover_df(df, df_path=md_df_path, **kwargs)
    if constraints is None:
        # should never happen
        return

    output = constraints.to_json(tddafile=constraints_path)
    if constraints_path and constraints_path != '-':
        with open(constraints_path, 'w') as f:
            f.write(output)
    elif verbose or constraints_path == '-':
        print(output)
    return output
Beispiel #7
0
import os
import pandas as pd
import sys

from tdda.constraints.pd.constraints import discover_df

inpath = '../../data/processed/wrangled_dataframe.csv'
outpath = '../../data/interim/constraints_initial_csvs/wrangled_dataframe.tdda'

df = pd.read_csv(inpath, low_memory=False)
constraints = discover_df(df)

with open(outpath, 'w') as f:
    f.write(constraints.to_json())

if os.path.exists(outpath):
    print('Written %s successfully.' % outpath)
    sys.exit(0)
else:
    print('Failed to write %s.' % outpath, file=sys.stderr)
    sys.exit(1)
Beispiel #8
0
def suggest_attribute_format(column_dict):
    df = pd.DataFrame(column_dict)
    constraints = discover_df(df, inc_rex=False)
    constraints_dict = constraints.to_dict()
    return constraints_dict