def test_concise_reference_testing(self): """Should be able to use a two-item RepeatingContainer to easily compare results by unpacking the RepeatingContainer directly in to the validate() function call. """ compare = datatest.RepeatingContainer(['foo', 'FOO']) datatest.validate(*compare.lower())
def test_population_format(detail): data = detail({'population'}) def integer_format(x): # <- Helper function. return str(x).isdecimal() validate(data, integer_format)
def test_named_entities_feature(clean_df): ''' Tests that the proper columns are created when using create_named_entities_feature(). Specifically there should be 12 columns, all containing float values corresponding to the count of the respective entity types in each message ''' test_df = create_named_entities_feature(clean_df) required_columns = {'PERSON', 'NORP', 'FAC', 'ORG', 'GPE', 'LOC', 'PRODUCT', 'EVENT', 'LANGUAGE', 'DATE', 'TIME', 'MONEY'} with accepted(Extra): validate(test_df.columns, required_columns) for column in list(required_columns): validate(test_df[column], float)
def test_using_regex(): """Check that values match the given pattern.""" data = ['bake', 'cake', 'bake'] regex = re.compile('[bc]ake') validate(data, regex)
def test_using_list(): """Check that the order of values match the required sequence.""" data = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] requirement = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] validate(data, requirement)
def test_using_set(): """Check for set membership.""" data = ['A', 'B', 'A'] requirement = {'A', 'B'} validate(data, requirement)
def test_using_tuple(): """Check that tuples of values satisfy corresponding tuple of requirements. """ data = [('A', 0.0), ('A', 1.0), ('A', 2)] requirement = ('A', float) datatest.validate(data, requirement)
def test_a(df): """ Check that a column has especific values """ data = df['A'].values requirement = {'x', 'y', 'z'} dt.validate(data, requirement)
def test_concise_reference_testing(self): """Should be able to use a two-item ProxyGroup to easily compare results by unpacking the ProxyGroup directly in to the validate() function call. """ compare = datatest.ProxyGroup(['foo', 'FOO']) datatest.validate(*compare.lower())
def get_indices_anbima(dt, wait=True): """ dt: str '%d/%m/%Y' ou dt obj """ if wait: if isinstance(wait,bool): wait = random.randint(1,3) sleep(wait) headers = {"User-Agent": np.random.choice(uas)} params["Dt_Ref"] = params["DataIni"] = params["DataFim"] = dt.strftime("%d/%m/%Y") r = requests.get(url, params=params, stream=True, headers=headers) r.raise_for_status() try: df = pd.read_csv(io.StringIO(r.text), sep=";",decimal=",",thousands=".",na_values="--", skiprows=1,parse_dates=["Data de Referência"],dayfirst=True,) assert df.shape[0] > 0, "0 linhas. " except pd.errors.EmptyDataError as e: print(dt, e, r.text, sep='\n') df = pd.DataFrame(columns=nomes_validos) # trata col_names e dtypes to_remove = ["<BR>","1.000","R$ mil"," de "," no ","d.u.","%","(",")","*",".",] df = df.set_axis(clean_names(df.columns, to_remove), axis=1).astype(valid_dtypes) # validacao datatest.validate(df.columns, nomes_validos) datatest.validate(df.dtypes, valid_dtypes) return df
def test_columns(df): dt.validate( df.columns, { 'date', 'place', 'fips', 'cases', 'deaths', 'state', 'change_in_cases', 'change_in_deaths' }, )
def test_traders_columns(traders_df): dt.validate( traders_df.columns, { 'countryCode', 'firstName', 'lastName', 'traderId', 'stockSymbol', 'stockName', 'tradeId', 'price', 'volume', 'tradeDate' }, )
def test_get_all_stocks_columns(all_stocks_df): dt.validate( all_stocks_df.columns, { 'cotacao', 'p_l', 'p_vp', 'psr', 'divyield', 'p_ativo', 'p_capgiro', 'p_ebit', 'p_ativ_circliq', 'ev_ebit', 'ev_ebitda', 'mrg_ebit', 'mrg_liq', 'liq_corr', 'roic', 'roe', 'liq2meses', 'patrim_liq', 'divbrut_patrim', 'cresc_rec5a' })
def test_using_function(): """Check that function returns True.""" data = [2, 4, 6, 8] def iseven(x): return x % 2 == 0 validate(data, iseven)
def test_should_apply_calculations(): df = load_model_results() df = apply_calculations(df) print(df) required_columns = { 'Contract', 'Price', 'Position', 'New Trade Action', 'Pnl Daily', 'model', 'commodity', 'pnlYtd', 'pnlLtd', 'mddYtd' } dt.validate(df.columns, required_columns)
def validate_plumbing_engine(step_plumb, solve_plumb, steady_by, converged, solve_state, step_state, solve_len, time_res): with dt.accepted.tolerance(SOLVE_TOL): dt.validate(solve_state, step_state, converged) assert step_plumb.current_pressures() == step_state assert solve_plumb.current_pressures() == solve_state assert solve_len < 2 * steady_by / time_res
def check_data_old(resp_data, expect_data, validators): for validator in validators: print(resp_data, expect_data, validator) if validator == '==': validate(resp_data, expect_data) elif validator == 'in': validate_in(resp_data, expect_data) elif validator.split('_')[0] == 'len': assert len(resp_data) == int(validator.split('_')[1])
def test_population_sums(detail, summary): data = detail({'state/territory': 'population'}).sum() requirement = summary({'state/territory': 'population'}).sum() omitted_territory = accepted({ 'Jervis Bay Territory': Deviation(-388, 388), }) with accepted.percent(0.03) | omitted_territory: validate(data, requirement)
def test_state_labels(detail, summary): data = detail({'state/territory'}) requirement = summary({'state/territory'}) omitted_territory = accepted([ Missing('Jervis Bay Territory'), ]) with omitted_territory: validate(data, requirement)
def test_get_stock_columns(stock_df, stock_symbol): dt.validate( stock_df.columns, { 'tipo', 'empresa', 'setor', 'subsetor', 'cotacao', 'data_ult_cot', 'min_52_sem', 'max_52_sem', 'vol_med_2m', 'valor_de_mercado', 'valor_da_firma', 'ult_balanco_processado', 'nro_acoes', 'p_l', 'p_vp', 'p_ebit', 'psr', 'p_ativos', 'p_cap_giro', 'p_ativ_circ_liq', 'div_yield', 'ev_ebitda', 'ev_ebit', 'cres_rec_5a' } )
def test_translation_feature(clean_df): ''' Makes sure that a translation feature is created as expected and that it only contains values of 0 and 1 ''' test_df = create_translation_feature(clean_df) with accepted(Extra): validate(test_df.columns, {'translated'}) validate(test_df['translated'], {0, 1})
def test_load_model_results(): df = load_model_results() required_columns = { 'Contract', 'Price', 'Position', 'New Trade Action', 'Pnl Daily', 'model', 'commodity' } dt.validate(df.columns, required_columns) assert 'Model1' in df['model'].tolist() assert 'Model2' in df['model'].tolist() assert 'Commodity1' in df['commodity'].tolist() assert 'Commodity2' in df['commodity'].tolist()
def check_data(resp_body, checkpoints): for checkpoint in checkpoints.keys(): resp_data = jsonpath(resp_body, f'$..{checkpoint}') expect_data = checkpoints[checkpoint] logger.log_info(f'\nresp_data: {resp_data}\n\ expect_data: {expect_data}') try: validate(resp_data, expect_data) except ValidationError: logger.log_error(f"\nresponse_data:{resp_data}\n\ expect_data: {expect_data}") raise
def test_using_dict(): """Check that values satisfy requirements of matching keys.""" data = { 'A': 100, 'B': 200, 'C': 300, } requirement = { 'A': 100, 'B': 200, 'C': 300, } validate(data, requirement)
def test_compare_rows(self): """Should be able to compare rows by calling a selector by its own fieldnames. """ a = self.selector_a b = self.selector_b # A case we want to optimize. datatest.validate(a(a.fieldnames), a(a.fieldnames)) # A case we want to optimize (using ordered intersection of fieldnames). common_fields = tuple(x for x in a.fieldnames if x in b.fieldnames) datatest.validate(a(common_fields), b(common_fields))
def test_feature_columns(features): ''' Expect that there will be 14 columns in total with a very specific set of names ''' required_columns = { 'message', 'entity_PERSON', 'entity_NORP', 'entity_FAC', 'entity_ORG', 'entity_GPE', 'entity_LOC', 'entity_PRODUCT', 'entity_EVENT', 'entity_LANGUAGE', 'entity_DATE', 'entity_TIME', 'entity_MONEY', 'translated' } validate(features.columns, required_columns)
def test_max_value(): """Validates values within a list""" data = [60, 200, 18, 99, 105] def max200(x): if x <= 200: return True return dt.Deviation(x - 200, 200) def test_sum(): assert sum(data) == 482 # ... add more functions here dt.validate(data, max200)
def test_compare_fieldnames(self): """Should be able to compare ``fieldnames`` between Selectors by simply casting the *requirement* as a set and comparing it directly against the ``fieldnames`` parameter of the other Selector. """ a = self.selector_a b = self.selector_b # A case we want to optimize. datatest.validate(a.fieldnames, set(a.fieldnames)) # A case we want to optimize. with datatest.allowed.specific(datatest.Extra('C')): datatest.validate(a.fieldnames, set(b.fieldnames))
def test_merged_columns(df): ''' Checks that DataFrame loading and merge was successful. Specifically checks that 5 columns are present (and no more): id message original genre categories ''' required_names = {'id', 'message', 'original', 'genre', 'categories'} validate(df.columns, required_names)
def test_label_columns(labels): ''' Expect that there will be 36 columns in total with a very specific set of names ''' required_columns = { 'related', 'request', 'offer', 'aid_related', 'medical_help', 'medical_products', 'search_and_rescue', 'security', 'military', 'child_alone', 'water', 'food', 'shelter', 'clothing', 'money', 'missing_people', 'refugees', 'death', 'other_aid', 'infrastructure_related', 'transport', 'buildings', 'electricity', 'tools', 'hospitals', 'shops', 'aid_centers', 'other_infrastructure', 'weather_related', 'floods', 'storm', 'fire', 'earthquake', 'cold', 'other_weather', 'direct_report' } validate(labels.columns, required_columns)