def is_incremented(compCat, bigfilt, smallfilt, df, offset=0): """ general equation is compCat(bigCat:bigElem) = compCat(smallCat:smallElem) + offset Same as is_ordered, but eq instead of gt if bigfilt or smallfilt are strings instead of lambdas, turn them into lambdas with the val_filter function we also know, from this, that bigCat:bigElem != smallCat:smallElem """ compCat = common.comparison_category(compCat, df) bigfilt = common.force_filter(bigfilt) smallfilt = common.force_filter(smallfilt) df2 = df.copy() # take care of the != clause first df2 = is_diff(bigfilt, smallfilt, df2) # possible values small = df2[smallfilt(df2)][compCat].unique() big = df2[bigfilt(df2)][compCat].unique() # find impossible values (small values with no corresponding big; vice versa) badsmall = set(small).difference(big - offset) badbig = set(big).difference(small + offset) # drop impossible values df2.loc[smallfilt(df2) & (df2[compCat].isin(badsmall)), common.STATUS] = common.REJECTED df2.loc[bigfilt(df2) & (df2[compCat].isin(badbig)), common.STATUS] = common.REJECTED return df2
def is_ordered(compCat, bigfilt, smallfilt, df, offset=0): """ general equation is compCat(bigCat:bigElem) > compCat(smallCat:smallElem) + offset we also know, from this, that bigCat:bigElem != smallCat:smallElem """ compCat = common.comparison_category(compCat, df) bigfilt = common.force_filter(bigfilt) smallfilt = common.force_filter(smallfilt) df2 = df.copy() # take care of the != clause first df2 = is_diff(bigfilt, smallfilt, df2) # all vals of bigCat must be > the minium val of smallCat minSmall = df2[smallfilt(df2)][compCat].min() df2.loc[ bigfilt(df2) & (df2[compCat] <= (minSmall + offset)), common.STATUS ] = common.REJECTED # all vals of smallCat must be < the largest val of bigCat maxBig = df2[bigfilt(df2)][compCat].max() df2.loc[ smallfilt(df2) & (df2[compCat] >= (maxBig - offset)), common.STATUS ] = common.REJECTED return df2
def is_same(filt1, filt2, df): """ Reject all rows where cat1:elem1 != cat2:elem2 """ filt1 = common.force_filter(filt1) filt2 = common.force_filter(filt2) df2 = df.copy() df2.loc[filt1(df2) & (~filt2(df2)), common.STATUS] = common.REJECTED df2.loc[(~filt1(df2)) & filt2(df2), common.STATUS] = common.REJECTED return df2
def is_either_or(isfilt, eitherfilt, orfilt, df): isfilt = common.force_filter(isfilt) eitherfilt = common.force_filter(eitherfilt) orfilt = common.force_filter(orfilt) df2 = df.copy() # we have one exclusion relation here df2 = is_diff(eitherfilt, orfilt, df) # reject all values which are is but not either or df2.loc[(isfilt(df2) & ~((eitherfilt(df2)) | (orfilt(df2)))), common.STATUS] = common.REJECTED return df2
def is_diff(filt1, filt2, df): filt1 = common.force_filter(filt1) filt2 = common.force_filter(filt2) df2 = df.copy() df2.loc[filt1(df2) & filt2(df2), common.STATUS] = common.REJECTED return df2