def find_dataset(self, options=None): """ Obtém dataset de acordo com os parâmetros informados """ if QueryBuilder.catch_injection(options): raise ValueError('SQL reserved words not allowed!') str_where = '' if options.get('where') is not None: str_where = ' WHERE ' + self.build_filter_string( options.get('where')) str_group = '' nu_cats = options['categorias'] if options.get('pivot'): nu_cats = nu_cats + options.get('pivot') if options.get('agregacao', False): str_group = QueryBuilder.build_grouping_string( nu_cats, options['agregacao']) str_categorias = self.build_categorias(nu_cats, options) str_limit = '' if options.get('limit'): str_limit = f'LIMIT {options.get("limit")}' str_offset = '' if options.get('offset') is not None: str_offset = f'OFFSET {options.get("offset")}' if 'theme' not in options: options['theme'] = 'MAIN' query = self.get_named_query('QRY_FIND_DATASET').format( str_categorias, self.get_table_name(options.get('theme')), str_where, str_group, self.build_order_string(options.get('ordenacao')), str_limit, str_offset) return self.fetch_data(query)
def build_std_calcs(self, options): """Constrói campos calculados de valor, como min, max e normalizado """ val_field = getattr(self, 'VAL_FIELD', None) if val_field is None or self.get_default_partitioning(options) is None: return '' # Pega o valor passado ou padrão, para montar a query if QueryBuilder.check_params(options, ['valor']): val_field = options['valor'] # Pega o valor do particionamento res_partition = None if QueryBuilder.check_params(options, ['partition']): res_partition = options.get('partition') elif self.get_default_partitioning(options) != '': res_partition = self.get_default_partitioning(options) # Transforma o campo de valor em campo agregado conforme query if QueryBuilder.check_params(options, ['agregacao']): val_field = QueryBuilder.get_simple_agr_string( options['agregacao'][0], options['valor'][0]) if QueryBuilder.check_params(options, ['pivot']): res_partition = self.exclude_from_partition( options['categorias'], options['agregacao']) elif isinstance(val_field, list): val_field = val_field[0] str_res_partition = res_partition if res_partition is not None and isinstance(res_partition, list): str_res_partition = ",".join(res_partition) # Constrói a query arr_calcs = [] for calc in options['calcs']: # Always appends min and max when calc is not one of them if calc not in ['min_part', 'max_part']: pattern = self.replace_partition('min_part', options) if str_res_partition is None: pattern = pattern.replace('PARTITION BY {partition}', '') arr_calcs.append( pattern.format(val_field=val_field, partition=str_res_partition, calc='min_part')) pattern = self.replace_partition('max_part', options) if str_res_partition is None: pattern = pattern.replace('PARTITION BY {partition}', '') arr_calcs.append( pattern.format(val_field=val_field, partition=str_res_partition, calc='max_part')) # Resumes identification of calc pattern = self.replace_partition(calc, options) if str_res_partition is None: pattern = pattern.replace('PARTITION BY {partition}', '') arr_calcs.append( pattern.format(val_field=val_field, partition=str_res_partition, calc=calc)) return ', '.join(arr_calcs)
def build_joined_categorias(self, categorias, valor=None, agregacao=None, joined=None): ''' Constrói a parte dos atributos selecionados na query ''' if categorias is None or not categorias: raise ValueError('Invalid Categories - required') str_cat = [] suffix = self.get_join_suffix(joined) str_cat += QueryBuilder.transform_joined_categorias(categorias, suffix) if agregacao is not None and valor is not None: str_cat += self.combine_val_aggr(valor, agregacao, suffix) elif agregacao is not None and valor is None: str_cat += self.build_generic_agr_array(agregacao) if QueryBuilder.validate_field_array(str_cat): return ', '.join(str_cat) raise ValueError('Invalid attributes')
def find_joined_dataset(self, options=None): ''' Obtém dataset de acordo com os parâmetros informados ''' if QueryBuilder.catch_injection(options): raise ValueError('SQL reserved words not allowed!') if options['joined'] is None: raise ValueError('Joined table is required') str_where = '' if options['where'] is not None: str_where = ' WHERE ' + self.build_filter_string(options['where'], options['joined'], False) str_group = '' if options['agregacao'] is not None: str_group = self.build_joined_grouping_string( options['categorias'], options['agregacao'], options['joined'] ) if 'theme' not in options: options['theme'] = 'MAIN' str_categorias = self.build_joined_categorias(options['categorias'], options['valor'], options['agregacao'], options['joined']) query = self.get_named_query('QRY_FIND_JOINED_DATASET').format( str_categorias, self.get_table_name(options.get('theme')), # FROM self.get_table_name(options.get('joined')), # JOIN self.get_join_condition(options['joined'], options['where']), # ON str_where, # WHERE str_group, # GROUP BY self.build_order_string(options.get('ordenacao')) # ORDER BY ) return self.fetch_data(query)
def test_rank_count(self): ''' Verifica se retorna corretamente uma agregação RANK_COUNT ''' vlr = 'vl_indicador' agr = 'rank_count' result = QueryBuilder.get_agr_string(agr, vlr) expected = 'RANK() OVER(ORDER BY COUNT(vl_indicador) DESC) AS agr_rank_count_vl_indicador' self.assertEqual(result, expected)
def test_bypass(self): ''' Verifica se retorna None quando a agregação está na lista de ignore ''' vlr = 'vl_indicador' agr = 'DISTINCT' result = QueryBuilder.get_simple_agr_string(agr, vlr) self.assertEqual(result, None)
def build_joined_grouping_string(self, categorias=None, agregacao=None, joined=None): """ Constrói o tracho da query que comanda o agrupamento """ if categorias is None: raise ValueError('Invalid fields') nu_cats = [] for categoria in categorias: suffix = self.get_join_suffix(joined) if '-' in categoria: arr_categoria = categoria.split('-') if arr_categoria[0][-len(suffix):] == suffix: nu_cats.append(arr_categoria[0][:-len(suffix)]) else: nu_cats.append(arr_categoria[0]) elif categoria[-len(suffix):] == suffix: nu_cats.append(categoria[:-len(suffix)]) else: nu_cats.append(categoria) if agregacao is not None: if QueryBuilder.is_valid_grouping(agregacao): return f'GROUP BY {", ".join(nu_cats)}' return '' raise ValueError('Invalid aggregation (no value)')
def test_rank_dense_sum(self): ''' Verifica se retorna corretamente uma agregação RANK_DENSE_SUM ''' vlr = 'vl_indicador' agr = 'rank_dense_sum' result = QueryBuilder.get_agr_string(agr, vlr) expected = ('DENSE_RANK() OVER(ORDER BY SUM(vl_indicador) DESC) AS ' 'agr_rank_dense_sum_vl_indicador') self.assertEqual(result, expected)
def test_as_is(self): ''' Verifica se retorna corretamente uma agregação que está na lista as_is ''' vlr = 'vl_indicador' agr = 'sum' expected = 'sum(vl_indicador)' result = QueryBuilder.get_simple_agr_string(agr, vlr) self.assertEqual(result, expected)
def test_custom(self): ''' Verifica se retorna corretamente uma agregação que não está na lista as_is ''' vlr = 'vl_indicador' agr = 'pct_count' result = QueryBuilder.get_agr_string(agr, vlr) expected = ('COUNT(vl_indicador) * 100 / SUM(COUNT(vl_indicador)) ' 'OVER() AS agr_pct_count_vl_indicador') self.assertEqual(result, expected)
def exclude_from_partition(self, categorias, agregacoes, options=None): """ Remove do partition as categorias não geradas pela agregação """ partitions = self.get_default_partitioning(options).split(", ") groups = QueryBuilder.build_grouping_string( categorias, agregacoes).replace('GROUP BY ', '').split(", ") result = [] for partition in partitions: if partition in groups: result.append(partition) return ", ".join(result)
def build_agr_array(valor=None, agregacao=None): ''' Combina a agregação com o campo de valor, para juntar nos campos da query ''' if agregacao is None or not agregacao: return [] result = [] for each_aggr in agregacao: agr_string = QueryBuilder.get_agr_string(each_aggr, valor) if agr_string is not None: result.append(agr_string) return result
def build_generic_agr_array(agregacao=None): ''' Prepara agregação sem campo definido ''' if agregacao is None or not agregacao: return [] result = [] for each_aggr in agregacao: agr_string = QueryBuilder.get_agr_string(each_aggr, '*') if agr_string is not None: result.append(agr_string) return result
def build_filter_string(self, where=None, joined=None, is_on=False): ''' Builds WHERE clauses or added ON conditions ''' simple_operators = { 'EQ': "=", "NE": "!=", "LE": "<=", "LT": "<", "GE": ">=", "GT": ">", "LK": "LIKE" } boolean_operators = { "NL": "IS NULL", "NN": "IS NOT NULL" } suffix = '' if joined is not None: suffix = self.get_join_suffix(joined) if where is None or (joined is None and is_on): return '' arr_result = [] for each_clause in where: w_clause = each_clause.replace('\\-', '|') w_clause = w_clause.split('-') w_clause = [f.replace('|', '-') for f in w_clause] if w_clause[0].upper() == 'AND' or w_clause[0].upper() == 'OR': arr_result.append(w_clause[0]) elif QueryBuilder.validate_clause(w_clause, joined, is_on, suffix): if w_clause[0].upper() in simple_operators: arr_result.append( f'{w_clause[1]} ' f'{simple_operators[w_clause[0].upper()]} ' f'{w_clause[2]}' ) elif w_clause[0].upper() in boolean_operators: arr_result.append( f'{w_clause[1]} ' f'{boolean_operators[w_clause[0].upper()]}' ) elif w_clause[0].upper() == 'IN': arr_result.append(f'{w_clause[1]} IN ({",".join(w_clause[2:])})') elif w_clause[0].upper() in ['EQON', 'NEON', 'LEON', 'GEON', 'LTON', 'GTON']: resulting_string = f"regexp_replace(CAST({w_clause[1]} AS STRING), '[^[:digit:]]','')" if len(w_clause) == 5: # Substring resulting_string = f"substring({resulting_string}, {w_clause[3]}, {w_clause[4]})" arr_result.append(f"{resulting_string} {simple_operators.get(w_clause[0].upper()[:2])} '{w_clause[2]}'") elif w_clause[0].upper() in ['EQLPONSTR', 'NELPONSTR', 'LELPONSTR', 'GELPONSTR', 'LTLPONSTR', 'GTLPONSTR']: resulting_string = f"regexp_replace(CAST({w_clause[1]} AS STRING), '[^[:digit:]]','')" if len(w_clause) == 7: # Substring resulting_string = f"substring(LPAD({resulting_string}, {w_clause[3]}, '{w_clause[4]}'), {w_clause[5]}, {w_clause[6]})" arr_result.append(f"{resulting_string} {simple_operators.get(w_clause[0].upper()[:2])} '{w_clause[2]}'") elif w_clause[0].upper() in ['EQSTR', 'NESTR', 'LESTR', 'GESTR', 'LTSTR', 'GTSTR']: arr_result.append(f"substring(CAST({w_clause[1]} AS STRING), {w_clause[3]}, {w_clause[4]}) {simple_operators.get(w_clause[0].upper()[:2])} {w_clause[2]}") elif w_clause[0].upper() in ['EQLPSTR', 'NELPSTR', 'LELPSTR', 'GELPSTR', 'LTLPSTR', 'GTLPSTR']: arr_result.append(f"substring(LPAD(CAST({w_clause[1]} AS VARCHAR({w_clause[3]})), {w_clause[3]}, '{w_clause[4]}'), {w_clause[5]}, {w_clause[6]}) {simple_operators.get(w_clause[0].upper()[:2])} {w_clause[2]}") elif w_clause[0].upper() in ['EQLPINT', 'NELPINT', 'LELPINT', 'GELPINT', 'LTLPINT', 'GTLPINT']: arr_result.append(f"CAST(substring(LPAD(CAST({w_clause[1]} AS VARCHAR({w_clause[3]})), {w_clause[3]}, '{w_clause[4]}'), {w_clause[5]}, {w_clause[6]}) AS INTEGER) {simple_operators.get(w_clause[0].upper()[:2])} {w_clause[2]}") elif w_clause[0].upper() in ['EQSZ', 'NESZ', 'LESZ', 'GESZ', 'LTSZ', 'GTSZ']: arr_result.append(f"LENGTH(CAST({w_clause[1]} AS STRING)) {simple_operators.get(w_clause[0].upper()[:2])} {w_clause[2]}") return ' '.join(arr_result)
def test_some_joined_some_as(self): ''' Retorna categorias transformadas, algumas com rename ''' categorias = [ 'nm_indicador-nome', 'vl_indicador_mun-valor', 'ds_indicador', 'cd_indicador_mun' ] result = QueryBuilder.transform_joined_categorias(categorias, '_mun') expected = [ 'nm_indicador AS nome', 'vl_indicador AS valor', 'ds_indicador', 'cd_indicador' ] self.assertEqual(result, expected)
def build_order_string(ordenacao=None): ''' Prepara ordenação ''' if ordenacao is None or not ordenacao: return '' if not QueryBuilder.validate_field_array(ordenacao): raise ValueError('Invalid aggregation') order_str = '' for field in ordenacao: if order_str == '': order_str += 'ORDER BY ' else: order_str += ', ' if "-" in field: order_str += field[1:] + ' DESC' else: order_str += field return order_str
def build_filter_string(self, where=None, joined=None, is_on=False): """ Builds WHERE clauses or added ON conditions """ suffix = '' if joined is not None: suffix = self.get_join_suffix(joined) if where is None or (joined is None and is_on): return '' arr_result = [] for each_clause in where: w_clause = each_clause.replace('\\-', '|') w_clause = w_clause.split('-') w_clause = [f.replace('|', '-') for f in w_clause] if w_clause[0].upper() == 'AND' or w_clause[0].upper() == 'OR': arr_result.append(w_clause[0]) continue if not QueryBuilder.validate_clause(w_clause, joined, is_on, suffix): continue criteria = self.build_criteria(w_clause) if criteria: arr_result.append(criteria) return ' '.join(arr_result)
def test_validate_positive_complex(self): ''' Verifica positivo para palavras-chave de SQL ''' categorias = ["vl_indicador", "cd_ibge-truncate"] options = {"categorias": categorias} validation = QueryBuilder.catch_injection(options) self.assertEqual(validation, True)
def test_validate_semi_transform(self): ''' Verifica construção de categorias com transformação em parte ''' categorias = ['vl_indicador-valor', 'cd_ibge'] expected = ['vl_indicador AS valor', 'cd_ibge'] transformed = QueryBuilder.transform_categorias(categorias) self.assertEqual(transformed, expected)
def test_prepend_aggregations_empty(self): ''' Retorna lista vazia, pois não tem agregação genérica ''' aggrs = ['SUM', 'MAX'] result = QueryBuilder.prepend_aggregations(aggrs) self.assertEqual(result, [])
def test_validate_negative_null(self): ''' Verifica negativo para atributo nulo ''' categorias = ["vl_indicador", "cd_ibge"] options = {"categorias": categorias, "valor": None} validation = QueryBuilder.catch_injection(options) self.assertEqual(validation, False)
def test_valid_no_join(self): ''' Sinaliza positivamente quando não é um join ''' result = QueryBuilder.validate_clause(['eq', 'any'], None, None, None) self.assertEqual(result, True)
def test_with_distinct(self): ''' Retorna exceção quando não há agregação para agrupar ''' cats = ['nm_indicador-nome', 'nu_competencia'] agrs = ['SUM', 'MAX', 'DISTINCT'] result = QueryBuilder.build_grouping_string(cats, agrs) self.assertEqual(result, '')
def test_validate_negative(self): ''' Verifica negativo para palavras-chave de SQL ''' categorias = ["vl_indicador", "cd_ibge"] options = {"categorias": categorias} validation = QueryBuilder.catch_injection(options) self.assertEqual(validation, False)
def test_validate_positive(self): ''' Verifica positivo para separador de SQL ''' fields = ['vl_indicador', 'cd_ibge;select'] validation = QueryBuilder.validate_field_array(fields) self.assertEqual(validation, False)
def test_valid_join_com_sufixo(self): ''' Sinaliza positivamente quando é um filtro de join e tem um sufixo ''' result = QueryBuilder.validate_clause(['eq', 'any_mun'], 'municipio', True, '_mun') self.assertEqual(result, True)
def test_prepend_aggregations_valid(self): ''' Retorna distinct, dentre outras agregações enviadas ''' aggrs = ['SUM', 'DISTINCT', 'MAX'] result = QueryBuilder.prepend_aggregations(aggrs) self.assertEqual(result, ['DISTINCT'])
def test_invalid_join_sem_sufixo(self): ''' Sinaliza negativamente quando é um filtro de join e não tem um sufixo ''' result = QueryBuilder.validate_clause(['eq', 'any'], 'municipio', True, '_mun') self.assertEqual(result, False)
def test_prepend_aggregations_void(self): ''' Retorna lista vazia quando recebe agregação vazia ''' aggrs = [] result = QueryBuilder.prepend_aggregations(aggrs) self.assertEqual(result, [])
def test_invalid_out_join_sufixo(self): ''' Sinaliza negativamente quando é um filtro fora do join e tem um sufixo ''' result = QueryBuilder.validate_clause(['eq', 'any_mun'], 'municipio', False, '_mun') self.assertEqual(result, False)