Beispiel #1
0
 def test_check_fk_constraint_valid_1(self):
     A = pd.read_csv(path_a)
     B = pd.read_csv(path_b)
     C = pd.read_csv(path_c)
     status = ch.check_fk_constraint(C, 'ltable_ID', A, 'ID')
     self.assertEqual(status, True)
     status = ch.check_fk_constraint(C, 'rtable_ID', B, 'ID')
     self.assertEqual(status, True)
Beispiel #2
0
def _add_output_attributes(candset,
                           fk_ltable,
                           fk_rtable,
                           ltable=None,
                           rtable=None,
                           l_key=None,
                           r_key=None,
                           l_output_attrs=None,
                           r_output_attrs=None,
                           l_output_prefix='ltable_',
                           r_output_prefix='rtable_',
                           validate=True):

    if not isinstance(candset, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not isinstance(fk_ltable, six.string_types):
        logger.error('fk_ltable is not of type string')
        raise AssertionError('fk_ltable is not of type string')

    if not isinstance(fk_rtable, six.string_types):
        logger.error('fk_rtable is not of type string')
        raise AssertionError('fk_rtable is not of type string')

    if l_output_attrs is not None:

        if ltable is None:
            logger.error('ltable is not given to pull l_output_attrs')
            raise AssertionError('ltable is not given to pull l_output_attrs')
        if l_key is None:
            logger.error('ltable key cannot be None')
            raise AssertionError('ltable key cannot be None')

        if validate:
            check_fk_constraint(candset, fk_ltable, ltable, l_key)
        col_names = [l_output_prefix + c for c in l_output_attrs]
        l_df = create_proj_dataframe(ltable, l_key, candset[fk_ltable],
                                     l_output_attrs, col_names)

    if r_output_attrs is not None:
        if rtable is None:
            logger.error('rtable is not given to pull r_output_attrs')
            raise AssertionError('rtable is not given to pull r_output_attrs')
        if r_key is None:
            logger.error('rtable key cannot be None')
            raise AssertionError('rtable key cannot be None')
        if validate:
            check_fk_constraint(candset, fk_rtable, rtable, r_key)
        col_names = [r_output_prefix + c for c in r_output_attrs]
        r_df = create_proj_dataframe(rtable, r_key, candset[fk_rtable],
                                     r_output_attrs, col_names)

    if l_output_attrs is not None:
        candset = pd.concat([candset, l_df], axis=1)
    if r_output_attrs is not None:
        candset = pd.concat([candset, r_df], axis=1)
    return candset
Beispiel #3
0
def validate_metadata_for_candset(candset, key, fk_ltable, fk_rtable, ltable,
                                  rtable, l_key, r_key, lgr, verbose):
    if not isinstance(candset, pd.DataFrame):
        logger.error('Input cand.set is not of type pandas data frame')
        raise AssertionError('Input cand.set is not of type pandas data frame')

    if not key in candset.columns:
        logger.error('Input key ( %s ) not in the dataframe' % key)
        raise KeyError('Input key ( %s ) not in the dataframe' % key)

    if not fk_ltable in candset.columns:
        logger.error('Input fk_ltable ( %s ) not in the dataframe' % fk_ltable)
        raise KeyError('Input fk_ltable ( %s ) not in the dataframe' %
                       fk_ltable)

    if not fk_rtable in candset.columns:
        logger.error('Input fk_rtable ( %s ) not in the dataframe' % fk_rtable)
        raise KeyError('Input fk_rtable ( %s ) not in the dataframe' %
                       fk_rtable)

    if not isinstance(ltable, pd.DataFrame):
        logger.error('Input ltable is not of type pandas data frame')
        raise AssertionError('Input ltable is not of type pandas data frame')

    if not isinstance(rtable, pd.DataFrame):
        logger.error('Input rtable is not of type pandas data frame')
        raise AssertionError('Input rtable is not of type pandas data frame')

    if not l_key in ltable:
        logger.error('ltable key ( %s ) not in ltable' % l_key)
        raise KeyError('ltable key ( %s ) not in ltable' % l_key)

    if not r_key in rtable:
        logger.error('rtable key ( %s ) not in rtable' % r_key)
        raise KeyError('rtable key ( %s ) not in rtable' % r_key)

    validate_metadata_for_table(candset, key, 'cand.set', lgr, verbose)

    ch.log_info(lgr, 'Validating foreign key constraint for left table',
                verbose)
    assert ch.check_fk_constraint(candset, fk_ltable, ltable,
                                  l_key) == True, 'Cand.set does not satisfy foreign key ' \
                                                  'constraint with the left table'
    ch.log_info(lgr, '..... Done', verbose)
    ch.log_info(lgr, 'Validating foreign key constraint for right table',
                verbose)
    assert ch.check_fk_constraint(candset, fk_rtable, rtable,
                                  r_key) == True, 'Cand.set does not satisfy foreign key ' \
                                                  'constraint with the right table'
    ch.log_info(lgr, '..... Done', verbose)

    return True
Beispiel #4
0
def validate_metadata_for_candset(candset, key, fk_ltable, fk_rtable, ltable, rtable,
                                  l_key, r_key,
                                  lgr, verbose):
    if not isinstance(candset, pd.DataFrame):
        logger.error('Input cand.set is not of type pandas data frame')
        raise AssertionError('Input cand.set is not of type pandas data frame')

    if not key in candset.columns:
        logger.error('Input key ( %s ) not in the dataframe' % key)
        raise KeyError('Input key ( %s ) not in the dataframe' % key)

    if not fk_ltable in candset.columns:
        logger.error('Input fk_ltable ( %s ) not in the dataframe' % fk_ltable)
        raise KeyError('Input fk_ltable ( %s ) not in the dataframe' % fk_ltable)

    if not fk_rtable in candset.columns:
        logger.error('Input fk_rtable ( %s ) not in the dataframe' % fk_rtable)
        raise KeyError('Input fk_rtable ( %s ) not in the dataframe' % fk_rtable)

    if not isinstance(ltable, pd.DataFrame):
        logger.error('Input ltable is not of type pandas data frame')
        raise AssertionError('Input ltable is not of type pandas data frame')

    if not isinstance(rtable, pd.DataFrame):
        logger.error('Input rtable is not of type pandas data frame')
        raise AssertionError('Input rtable is not of type pandas data frame')

    if not l_key in ltable:
        logger.error('ltable key ( %s ) not in ltable' % l_key)
        raise KeyError('ltable key ( %s ) not in ltable' % l_key)

    if not r_key in rtable:
        logger.error('rtable key ( %s ) not in rtable' % r_key)
        raise KeyError('rtable key ( %s ) not in rtable' % r_key)

    validate_metadata_for_table(candset, key, 'cand.set', lgr, verbose)

    ch.log_info(lgr, 'Validating foreign key constraint for left table', verbose)
    assert ch.check_fk_constraint(candset, fk_ltable, ltable,
                                  l_key) == True, 'Cand.set does not satisfy foreign key ' \
                                                  'constraint with the left table'
    ch.log_info(lgr, '..... Done', verbose)
    ch.log_info(lgr, 'Validating foreign key constraint for right table', verbose)
    assert ch.check_fk_constraint(candset, fk_rtable, rtable,
                                  r_key) == True, 'Cand.set does not satisfy foreign key ' \
                                                  'constraint with the right table'
    ch.log_info(lgr, '..... Done', verbose)

    return True
Beispiel #5
0
def _add_output_attributes(candset, fk_ltable, fk_rtable, ltable=None, rtable=None,
                           l_key=None, r_key=None,
                           l_output_attrs=None, r_output_attrs=None,
                           l_output_prefix='ltable_', r_output_prefix='rtable_',
                           validate=True):

    if not isinstance(candset, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not isinstance(fk_ltable, six.string_types):
        logger.error('fk_ltable is not of type string')
        raise AssertionError('fk_ltable is not of type string')

    if not isinstance(fk_rtable, six.string_types):
        logger.error('fk_rtable is not of type string')
        raise AssertionError('fk_rtable is not of type string')

    if l_output_attrs is not None:

        if ltable is None:
            logger.error('ltable is not given to pull l_output_attrs')
            raise AssertionError('ltable is not given to pull l_output_attrs')
        if l_key is None:
            logger.error('ltable key cannot be None')
            raise AssertionError('ltable key cannot be None')

        if validate:
            check_fk_constraint(candset, fk_ltable, ltable, l_key)
        col_names = [l_output_prefix+c for c in l_output_attrs]
        l_df = create_proj_dataframe(ltable, l_key, candset[fk_ltable], l_output_attrs, col_names)

    if r_output_attrs is not None:
        if rtable is None:
            logger.error('rtable is not given to pull r_output_attrs')
            raise AssertionError('rtable is not given to pull r_output_attrs')
        if r_key is None:
            logger.error('rtable key cannot be None')
            raise AssertionError('rtable key cannot be None')
        if validate:
            check_fk_constraint(candset, fk_rtable, rtable, r_key)
        col_names = [r_output_prefix+c for c in r_output_attrs]
        r_df = create_proj_dataframe(rtable, r_key, candset[fk_rtable], r_output_attrs, col_names)

    if l_output_attrs is not None:
        candset = pd.concat([candset, l_df], axis=1)
    if r_output_attrs is not None:
        candset = pd.concat([candset, r_df], axis=1)
    return candset
Beispiel #6
0
 def test_check_fk_constraint_invalid_attr_mval(self):
     A = pd.read_csv(path_a)
     B = pd.read_csv(path_b)
     C = pd.read_csv(path_c)
     C.ix[0, 'ltable_ID'] = pd.np.NaN
     status = ch.check_fk_constraint(C, 'ltable_ID', A, 'ID')
     self.assertEqual(status, False)
Beispiel #7
0
def validate_and_set_fk_rtable(df_foreign, fk_rtable, rtable, r_key):
    # validations are done inside the check_fk_constraint fn.
    status = ch.check_fk_constraint(df_foreign, fk_rtable, rtable, r_key)
    if status == True:
        return set_property(df_foreign, 'fk_rtable', fk_rtable)
    else:
        logger.warning(
            'FK constraint for rtable and fk_rtable is not satisfied; Not setting the fk_rtable and rtable')
        return False
Beispiel #8
0
def validate_and_set_fk_rtable(df_foreign, fk_rtable, rtable, r_key):
    # validations are done inside the check_fk_constraint fn.
    status = ch.check_fk_constraint(df_foreign, fk_rtable, rtable, r_key)
    if status == True:
        return set_property(df_foreign, 'fk_rtable', fk_rtable)
    else:
        logger.warning(
            'FK constraint for rtable and fk_rtable is not satisfied; Not setting the fk_rtable and rtable'
        )
        return False
Beispiel #9
0
 def test_check_fk_constraint_invalid_attr_notin(self):
     A = pd.read_csv(path_a)
     B = pd.read_csv(path_b)
     C = pd.read_csv(path_c)
     status = ch.check_fk_constraint(C, 'ltable_ID', A, 'ID1')
     self.assertEqual(status, False)
Beispiel #10
0
 def test_check_fk_constraint_invalid_foreign_attr(self):
     ch.check_fk_constraint(pd.DataFrame(), None, pd.DataFrame(), 'ID')
Beispiel #11
0
 def test_check_fk_constraint_invalid_base_attr(self):
     ch.check_fk_constraint(pd.DataFrame(), 'rtable_ID', pd.DataFrame(), None)
Beispiel #12
0
 def test_check_fk_constraint_invalid_foreign_df(self):
     ch.check_fk_constraint(None, 'rtable_ID', pd.DataFrame(), 'ID')