def get_unique_values(self, tbl: LuxSQLTable): """ Function which collects the unique values for each variable within the specified Lux DataFrame's SQL table. Populates the metadata parameters of the specified Lux DataFrame. Parameters ---------- tbl: lux.LuxSQLTable lux.LuxSQLTable object whose metadata will be calculated Returns ------- None """ unique_vals = {} for attr in list(tbl.columns): unique_query = 'SELECT Distinct("{}") FROM {} WHERE "{}" IS NOT NULL'.format( attr, tbl.table_name, attr ) unique_data = pandas.read_sql( unique_query, lux.config.SQLconnection, ) unique_vals[attr] = list(unique_data[attr]) tbl.unique_values = unique_vals
def compute_stats(self, tbl: LuxSQLTable): """ Function which computes the min and max values for each variable within the specified Lux DataFrame's SQL table. Populates the metadata parameters of the specified Lux DataFrame. Parameters ---------- tbl: lux.LuxSQLTable lux.LuxSQLTable object whose metadata will be calculated Returns ------- None """ # precompute statistics tbl.unique_values = {} tbl._min_max = {} length_query = pandas.read_sql(lux.config.query_templates['length_query'].format(table_name = tbl.table_name, where_clause = ""),lux.config.SQLconnection,) tbl._length = list(length_query["length"])[0] self.get_unique_values(tbl) for attribute in tbl.columns: if tbl._data_type[attribute] == "quantitative": min_max_query = pandas.read_sql(lux.config.query_templates['min_max_query'].format(attribute = attribute, table_name = tbl.table_name),lux.config.SQLconnection,) tbl._min_max[attribute] = (list(min_max_query["min"])[0],list(min_max_query["max"])[0],)
def get_unique_values(self, tbl: LuxSQLTable): """ Function which collects the unique values for each variable within the specified Lux DataFrame's SQL table. Populates the metadata parameters of the specified Lux DataFrame. Parameters ---------- tbl: lux.LuxSQLTable lux.LuxSQLTable object whose metadata will be calculated Returns ------- None """ unique_vals = {} for attr in list(tbl.columns): unique_query = lux.config.query_templates['unique_query'].format(attribute = attr, table_name = tbl.table_name) unique_data = pandas.read_sql(unique_query,lux.config.SQLconnection,) unique_vals[attr] = list(unique_data[attr]) tbl.unique_values = unique_vals
def compute_stats(self, tbl: LuxSQLTable): """ Function which computes the min and max values for each variable within the specified Lux DataFrame's SQL table. Populates the metadata parameters of the specified Lux DataFrame. Parameters ---------- tbl: lux.LuxSQLTable lux.LuxSQLTable object whose metadata will be calculated Returns ------- None """ # precompute statistics tbl.unique_values = {} tbl._min_max = {} length_query = pandas.read_sql( "SELECT COUNT(1) as length FROM {}".format(tbl.table_name), lux.config.SQLconnection, ) tbl._length = list(length_query["length"])[0] self.get_unique_values(tbl) for attribute in tbl.columns: if tbl._data_type[attribute] == "quantitative": min_max_query = pandas.read_sql( 'SELECT MIN("{}") as min, MAX("{}") as max FROM {}'.format( attribute, attribute, tbl.table_name ), lux.config.SQLconnection, ) tbl._min_max[attribute] = ( list(min_max_query["min"])[0], list(min_max_query["max"])[0], )