def get_statistics(self, options): ''' Gets statistics for a company using impala ''' if options.get('column_family'): dataframes = [options.get('column_family')] else: dataframes = self.TOPICS # TODO 1 - Check if tables and topics names match # TODO 99 - Add threads to run impala queries result = {} thematic_handler = Thematic() for df in dataframes: # Get statistics for dataset cols = thematic_handler.get_column_defs(df) local_cols = cols.copy() # Autos and Catweb need a timeframe to filter if df in ['auto', 'catweb'] and 'column' not in options: raise AttributeError(f'{df} demanda uma competência') # If the dataset doesn't have a unique column to identify a company if isinstance(cols.get('cnpj_raiz'), dict) and thematic_handler.get_persp_values(df): local_result = {} perspectives = thematic_handler.get_persp_values(df) if options.get('perspective'): perspectives = {k: v for k, v in perspectives.items() if k == options.get('perspective')} for each_persp_key, each_persp_value in perspectives.items(): local_cols = thematic_handler.decode_column_defs(cols, df, each_persp_key) local_options = self.get_stats_local_options(options, local_cols, df, each_persp_key) if df != 'catweb': local_options["where"].append(f"and") local_options["where"].append(f"eq-{thematic_handler.get_persp_columns(df)}-{each_persp_value}") base_stats = thematic_handler.find_dataset(local_options) if df not in result: result[df] = base_stats.get('metadata') if base_stats.get('dataset',[]): local_result[each_persp_key] = base_stats.get('dataset')[0] else: local_result[each_persp_key] = self.build_empty_stats(local_options, local_cols, options) local_result[each_persp_key] = { **local_result[each_persp_key], **self.get_grouped_stats(thematic_handler, options, local_options, local_cols) } result[df]['stats_persp'] = local_result else: if isinstance(cols.get('cnpj_raiz'), dict): local_cols = thematic_handler.decode_column_defs(local_cols, df, options.get('perspective')) local_options = self.get_stats_local_options(options, local_cols, df, options.get('perspective')) base_stats = thematic_handler.find_dataset(local_options) result[df] = base_stats.get('metadata') if base_stats.get('dataset',[]): result[df]["stats"] = base_stats.get('dataset')[0] else: result[df]["stats"] = self.build_empty_stats(local_options, local_cols, options) result[df] = {**result[df], **self.get_grouped_stats(thematic_handler, options, local_options, cols)} return result
def get_dataframe(self, options, struct={}, added_options={}): if options.get('operation'): return Thematic().find_and_operate( options.get('operation'), { **{'as_pandas': True, 'no_wrap': True}, **ViewConfReader.api_to_options( struct.get('api'), {**options, **added_options} ) } ) return Thematic().find_dataset( { **{'as_pandas': True, 'no_wrap': True}, **ViewConfReader.api_to_options( struct.get('api'), {**options, **added_options} ) } )
def set_domain(self): ''' Setter invoked from constructor ''' self.domain = Thematic()
def get_domain(self): ''' Carrega o modelo de domínio, se não o encontrar ''' if self.domain is None: self.domain = Thematic() return self.domain
def __init__(self): ''' Construtor''' self.domain = Thematic()
def get_thematic_handler(self): ''' Gets single instance of Thematic model to delegate query execution ''' if self.thematic_handler is None: self.thematic_handler = Thematic() return self.thematic_handler