예제 #1
0
    def get_unique_values(self, tbl: LuxSQLTable):
        """
        Function which collects the unique values for each variable within the specified Lux DataFrame's SQL table.
        Populates the metadata parameters of the specified Lux DataFrame.

        Parameters
        ----------
        tbl: lux.LuxSQLTable
            lux.LuxSQLTable object whose metadata will be calculated

        Returns
        -------
        None
        """
        unique_vals = {}
        for attr in list(tbl.columns):
            unique_query = 'SELECT Distinct("{}") FROM {} WHERE "{}" IS NOT NULL'.format(
                attr, tbl.table_name, attr
            )
            unique_data = pandas.read_sql(
                unique_query,
                lux.config.SQLconnection,
            )
            unique_vals[attr] = list(unique_data[attr])
        tbl.unique_values = unique_vals
예제 #2
0
    def compute_stats(self, tbl: LuxSQLTable):
        """
        Function which computes the min and max values for each variable within the specified Lux DataFrame's SQL table.
        Populates the metadata parameters of the specified Lux DataFrame.

        Parameters
        ----------
        tbl: lux.LuxSQLTable
            lux.LuxSQLTable object whose metadata will be calculated

        Returns
        -------
        None
        """
        # precompute statistics
        tbl.unique_values = {}
        tbl._min_max = {}
        length_query = pandas.read_sql(lux.config.query_templates['length_query'].format(table_name = tbl.table_name, where_clause = ""),lux.config.SQLconnection,)
        tbl._length = list(length_query["length"])[0]

        self.get_unique_values(tbl)
        for attribute in tbl.columns:
            if tbl._data_type[attribute] == "quantitative":
                min_max_query = pandas.read_sql(lux.config.query_templates['min_max_query'].format(attribute = attribute, table_name = tbl.table_name),lux.config.SQLconnection,)
                tbl._min_max[attribute] = (list(min_max_query["min"])[0],list(min_max_query["max"])[0],)
예제 #3
0
    def get_unique_values(self, tbl: LuxSQLTable):
        """
        Function which collects the unique values for each variable within the specified Lux DataFrame's SQL table.
        Populates the metadata parameters of the specified Lux DataFrame.

        Parameters
        ----------
        tbl: lux.LuxSQLTable
            lux.LuxSQLTable object whose metadata will be calculated

        Returns
        -------
        None
        """
        unique_vals = {}
        for attr in list(tbl.columns):
            unique_query = lux.config.query_templates['unique_query'].format(attribute = attr, table_name = tbl.table_name)
            unique_data = pandas.read_sql(unique_query,lux.config.SQLconnection,)
            unique_vals[attr] = list(unique_data[attr])
        tbl.unique_values = unique_vals
예제 #4
0
    def compute_stats(self, tbl: LuxSQLTable):
        """
        Function which computes the min and max values for each variable within the specified Lux DataFrame's SQL table.
        Populates the metadata parameters of the specified Lux DataFrame.

        Parameters
        ----------
        tbl: lux.LuxSQLTable
            lux.LuxSQLTable object whose metadata will be calculated

        Returns
        -------
        None
        """
        # precompute statistics
        tbl.unique_values = {}
        tbl._min_max = {}
        length_query = pandas.read_sql(
            "SELECT COUNT(1) as length FROM {}".format(tbl.table_name),
            lux.config.SQLconnection,
        )
        tbl._length = list(length_query["length"])[0]

        self.get_unique_values(tbl)
        for attribute in tbl.columns:
            if tbl._data_type[attribute] == "quantitative":
                min_max_query = pandas.read_sql(
                    'SELECT MIN("{}") as min, MAX("{}") as max FROM {}'.format(
                        attribute, attribute, tbl.table_name
                    ),
                    lux.config.SQLconnection,
                )
                tbl._min_max[attribute] = (
                    list(min_max_query["min"])[0],
                    list(min_max_query["max"])[0],
                )