def test_vDF_pivot_table_chi2(self, titanic_vd): result = titanic_vd.pivot_table_chi2("survived") assert result["chi2"][0] == pytest.approx(345.12775126385327) assert result["chi2"][1] == pytest.approx(139.2026595859198) assert result["chi2"][2] == pytest.approx(103.48373526643627) assert result["chi2"][3] == pytest.approx(53.47413727076916) assert result["chi2"][4] == pytest.approx(44.074789072247576) assert result["chi2"][5] == pytest.approx(42.65927519250441) assert result["chi2"][6] == pytest.approx(34.54227539207669) assert result["chi2"][7] == pytest.approx(-2.6201263381153694e-14) assert result["p_value"][0] == pytest.approx(4.8771014178794746e-77) assert result["p_value"][1] == pytest.approx(6.466448884215497e-32) assert result["p_value"][2] == pytest.approx(5.922792773022131e-31) assert result["p_value"][3] == pytest.approx(2.9880194205753303e-09) assert result["p_value"][4] == pytest.approx(7.143900900120299e-08) assert result["p_value"][5] == pytest.approx(5.4532585750903e-10) assert result["p_value"][6] == pytest.approx(0.0028558258758971957) set_option("random_state", 0) result = titanic_vd.pivot_table_chi2("survived", method="smart") assert result["chi2"][0] == pytest.approx(345.12775126385327) assert result["chi2"][1] == pytest.approx(187.75090682844288) assert result["chi2"][2] == pytest.approx(139.2026595859198) assert result["chi2"][3] == pytest.approx(53.474137270768885) assert result["chi2"][4] == pytest.approx(44.074789072247576) assert result["chi2"][5] == pytest.approx(42.65927519250441) assert result["chi2"][6] == pytest.approx(38.109904618027755) assert result["chi2"][7] == pytest.approx(0.0)
# See the License for the specific language governing permissions and # limitations under the License. # Pytest import pytest # Standard Python Modules import warnings, os # VerticaPy import verticapy from verticapy import drop, set_option, tablesample from verticapy.datasets import load_titanic from verticapy.sql import sql set_option("print_info", False) @pytest.fixture(scope="module") def titanic_vd(): titanic = load_titanic() yield titanic drop(name="public.titanic") class TestSQL: def test_sql(self, titanic_vd): # SQL line Test -c result = sql(' -c "SELECT * FROM titanic;"', "") assert result.shape() == (1234, 14)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest, warnings from verticapy import vDataFrame from verticapy import drop_table from verticapy import set_option set_option("print_info", False) set_option("random_state", 0) @pytest.fixture(scope="module") def smart_meters_vd(base): from verticapy.learn.datasets import load_smart_meters smart_meters = load_smart_meters(cursor=base.cursor) yield smart_meters with warnings.catch_warnings(record=True) as w: drop_table( name="public.smart_meters", cursor=base.cursor, )
def sql(line, cell="", local_ns=None): # We don't want to display the query/time twice if the options are still on # So we save the previous configuration and turn them off. sql_on, time_on = verticapy.options["sql_on"], verticapy.options["time_on"] set_option("sql_on", False) set_option("time_on", False) try: # Initialization queries = "" if (not (cell) and (line)) else cell # Options options = {} all_options_dict = get_magic_options(line) for option in all_options_dict: if option.lower() in ( "-f", "--file", "-o", "--output", "-nrows", "-ncols", "-c", "--command", ): if option.lower() in ("-f", "--file"): if "-f" in options: raise ParameterError("Duplicate option '-f'.") options["-f"] = all_options_dict[option] elif option.lower() in ("-o", "--output"): if "-o" in options: raise ParameterError("Duplicate option '-o'.") options["-o"] = all_options_dict[option] elif option.lower() in ("-c", "--command"): if "-c" in options: raise ParameterError("Duplicate option '-c'.") options["-c"] = all_options_dict[option] elif option.lower() in ("-nrows", ): if "-nrows" in options: raise ParameterError("Duplicate option '-nrows'.") options["-nrows"] = int(all_options_dict[option]) elif option.lower() in ("-ncols", ): if "-ncols" in options: raise ParameterError("Duplicate option '-ncols'.") options["-ncols"] = int(all_options_dict[option]) elif verticapy.options["print_info"]: warning_message = ( f"\u26A0 Warning : The option '{option}' doesn't " "exist, it was skipped.") warnings.warn(warning_message, Warning) if "-f" in options and "-c" in options: raise ParameterError("Do not find which query to run: One of " "the options '-f' and '-c' must be empty.") if cell and ("-f" in options or "-c" in options): raise ParameterError( "Cell must be empty when using options '-f' or '-c'.") if "-f" in options: f = open(options["-f"], "r") queries = f.read() f.close() elif "-c" in options: queries = options["-c"] # Cleaning the Query queries = clean_query(queries) queries = replace_vars_in_query(queries, locals()["local_ns"]) n, i, all_split = len(queries), 0, [] while i < n and queries[n - i - 1] in (";", " ", "\n"): i += 1 queries = queries[:n - i] i, n = 0, n - i while i < n: if queries[i] == '"': i += 1 while i < n and queries[i] != '"': i += 1 elif queries[i] == "'": i += 1 while i < n and queries[i] != "'": i += 1 elif queries[i] == ";": all_split += [i] i += 1 all_split = [0] + all_split + [n] m = len(all_split) queries = [ queries[all_split[i]:all_split[i + 1]] for i in range(m - 1) ] n = len(queries) for i in range(n): query = queries[i] while len(query) > 0 and (query[-1] in (";", " ")): query = query[0:-1] while len(query) > 0 and (query[0] in (";", " ")): query = query[1:] queries[i] = query queries_tmp, i = [], 0 while i < n: query = queries[i] if (i < n - 1) and (queries[i + 1].lower() == "end"): query += "; {}".format(queries[i + 1]) i += 1 queries_tmp += [query] i += 1 queries, n = queries_tmp, len(queries_tmp) result, start_time = None, time.time() # Executing the Queries for i in range(n): query = queries[i] if query.split(" ")[0]: query_type = query.split(" ")[0].upper() else: query_type = query.split(" ")[1].upper() if len(query_type) > 1 and query_type[0:2] in ("/*", "--"): query_type = "undefined" if (query_type == "COPY") and ("from local" in query.lower()): query = re.split("from local", query, flags=re.IGNORECASE) if query[1].split(" ")[0]: file_name = query[1].split(" ")[0] else: file_name = query[1].split(" ")[1] query = ("".join(query[0]) + "FROM" + "".join(query[1]).replace(file_name, "STDIN")) if (file_name[0] == file_name[-1]) and (file_name[0] in ('"', "'")): file_name = file_name[1:-1] executeSQL(query, method="copy", path=file_name, print_time_sql=False) elif (i < n - 1) or ( (i == n - 1) and (query_type.lower() not in ("select", "with", "undefined"))): executeSQL(query, print_time_sql=False) if verticapy.options["print_info"]: print(query_type) else: error = "" try: result = vDataFrameSQL("({}) x".format(query)) # Display parameters if "-nrows" in options: result._VERTICAPY_VARIABLES_["max_rows"] = options[ "-nrows"] if "-ncols" in options: result._VERTICAPY_VARIABLES_["max_columns"] = options[ "-ncols"] except: try: final_result = executeSQL(query, method="fetchfirstelem", print_time_sql=False) if final_result and verticapy.options["print_info"]: print(final_result) elif verticapy.options["print_info"]: print(query_type) except Exception as e: error = e if error: raise QueryError(error) # Displaying the information elapsed_time = round(time.time() - start_time, 3) if verticapy.options["print_info"]: display(HTML(f"<div><b>Execution: </b> {elapsed_time}s</div>")) # Exporting the result if isinstance(result, vDataFrame) and "-o" in options: if options["-o"][-4:] == "json": result.to_json(options["-o"]) else: result.to_csv(options["-o"]) # we load the previous configuration before returning the result. set_option("sql_on", sql_on) set_option("time_on", time_on) return result except: # If it fails, we load the previous configuration before raising the error. set_option("sql_on", sql_on) set_option("time_on", time_on) raise