def fit(self, input_relation: str, X: list):
     self.input_relation = input_relation
     self.X = [str_column(column) for column in X]
     query = "SELECT PCA('{}', '{}', '{}' USING PARAMETERS scale = {}, method = '{}'"
     query = query.format(self.name, input_relation, ", ".join(self.X),
                          self.scale, self.method)
     if (self.n_components):
         query += ", num_components = {}".format(self.n_components)
     query += ")"
     self.cursor.execute(query)
     self.components = to_tablesample(
         query=
         "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'principal_components')"
         .format(self.name),
         cursor=self.cursor)
     self.components.table_info = False
     self.explained_variance = to_tablesample(
         query=
         "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'singular_values')"
         .format(self.name),
         cursor=self.cursor)
     self.explained_variance.table_info = False
     self.mean = to_tablesample(
         query=
         "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'columns')"
         .format(self.name),
         cursor=self.cursor)
     self.mean.table_info = False
     return (self)
Esempio n. 2
0
	def fit(self, input_relation: str, X: list):
		self.input_relation = input_relation
		self.X = [str_column(column) for column in X]
		query = "SELECT ONE_HOT_ENCODER_FIT('{}', '{}', '{}' USING PARAMETERS extra_levels = '{}')".format(self.name, input_relation, ", ".join(self.X), self.extra_levels)
		self.cursor.execute(query)
		try:
			self.param = to_tablesample(query = "SELECT category_name, category_level::varchar, category_level_index FROM (SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'integer_categories')) x UNION ALL SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'varchar_categories')".format(self.name, self.name), cursor = self.cursor)
		except:
			try:
				self.param = to_tablesample(query = "SELECT category_name, category_level::varchar, category_level_index FROM (SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'integer_categories')) x".format(self.name), cursor = self.cursor)
			except:
				self.param = to_tablesample(query = "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'varchar_categories')".format(self.name), cursor = self.cursor)
		self.param.table_info = False
		return (self)
Esempio n. 3
0
def multilabel_confusion_matrix(y_true: str, 
								y_score: str, 
								input_relation: str,
								cursor,
								labels: list):
	num_classes = str(len(labels))
	query = "SELECT CONFUSION_MATRIX(obs, response USING PARAMETERS num_classes = {}) OVER() FROM (SELECT DECODE({}".format(num_classes, y_true) 
	for idx, item in enumerate(labels):
		query += ", '" + str(item) + "', " + str(idx)
	query += ") AS obs, DECODE({}".format(y_score)
	for idx,item in enumerate(labels):
		query += ", '" + str(item) + "', " + str(idx)
	query += ") AS response FROM {}) x".format(input_relation)
	result = to_tablesample(query, cursor)
	result.table_info = False
	del (result.values["comment"])
	result = result.transpose()
	result.values["actual_class"] = labels
	result = result.transpose()
	matrix = {"index": labels}
	for elem in result.values:
		if (elem != "actual_class"):
			matrix[elem] = result.values[elem]
	result.values = matrix
	return (result)
Esempio n. 4
0
 def fit(self,
         input_relation: str,
         X: list,
         y: str,
         test_relation: str = ""):
     self.input_relation = input_relation
     self.test_relation = test_relation if (
         test_relation) else input_relation
     self.X = [str_column(column) for column in X]
     self.y = str_column(y)
     query = "SELECT SVM_REGRESSOR('{}', '{}', '{}', '{}' USING PARAMETERS C = {}, epsilon = {}, max_iterations = {}"
     query = query.format(self.name, input_relation, self.y,
                          ", ".join(self.X), self.C, self.tol,
                          self.max_iter)
     query += ", error_tolerance = {}".format(self.acceptable_error_margin)
     if (self.fit_intercept):
         query += ", intercept_mode = '{}', intercept_scaling = {}".format(
             self.intercept_mode, self.intercept_scaling)
     query += ")"
     self.cursor.execute(query)
     self.coef = to_tablesample(
         query=
         "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'details')"
         .format(self.name),
         cursor=self.cursor)
     self.coef.table_info = False
     return (self)
Esempio n. 5
0
	def fit(self, input_relation: str, X: list):
		self.input_relation = input_relation
		self.X = [str_column(column) for column in X]
		query = "SELECT NORMALIZE_FIT('{}', '{}', '{}', '{}')".format(self.name, input_relation, ", ".join(self.X), self.method)
		self.cursor.execute(query)
		self.param = to_tablesample(query = "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'details')".format(self.name), cursor = self.cursor)
		self.param.table_info = False
		return (self)
Esempio n. 6
0
	def fit(self, input_relation: str, X: list):
		self.input_relation = input_relation
		self.X = [str_column(column) for column in X]
		query = "SELECT KMEANS('{}', '{}', '{}', {} USING PARAMETERS max_iterations = {}, epsilon = {}".format(self.name, input_relation, ", ".join(self.X), self.n_cluster, self.max_iter, self.tol)
		name = "_vpython_kmeans_initial_centers_table_" 
		schema, relation = schema_relation(input_relation)
		schema = str_column(schema)
		if (type(self.init) != str):
			self.cursor.execute("DROP TABLE IF EXISTS {}.{}".format(schema, name))
			if (len(self.init) != self.n_cluster):
				raise ValueError("'init' must be a list of 'n_cluster' = {} points".format(self.n_cluster))
			else:
				for item in self.init:
					if (len(X) != len(item)):
						raise ValueError("Each points of 'init' must be of size len(X) = {}".format(len(self.X)))
				temp_initial_centers = [item for item in self.init]
				for item in temp_initial_centers:
					del temp_initial_centers[0]
					if (item in temp_initial_centers):
						raise ValueError("All the points of 'init' must be different")
				query0 = []
				for i in range(len(self.init)):
					line = []
					for j in range(len(self.init[0])):
						line += [str(self.init[i][j]) + " AS " + X[j]]
					line = ",".join(line)
					query0 += ["SELECT " + line]
				query0 = " UNION ".join(query0)
				query0 = "CREATE TEMPORARY TABLE {}.{} ON COMMIT PRESERVE ROWS AS {}".format(schema, name, query0)
				self.cursor.execute(query0)
				query += ", initial_centers_table = '" + name + "'"
		else:
			query += ", init_method = '" + self.init + "'"
		query += ")"
		self.cursor.execute(query)
		self.cursor.execute("DROP TABLE IF EXISTS {}.{}".format(schema, name))
		self.cluster_centers = to_tablesample(query = "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'centers')".format(self.name), cursor = self.cursor)
		self.cluster_centers.table_info = False
		query = "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'metrics')".format(self.name)
		self.cursor.execute(query)
		result = self.cursor.fetchone()[0]
		values = {"index": ["Between-Cluster Sum of Squares", "Total Sum of Squares", "Total Within-Cluster Sum of Squares", "Between-Cluster SS / Total SS", "converged"]}
		values["value"] = [float(result.split("Between-Cluster Sum of Squares: ")[1].split("\n")[0]), float(result.split("Total Sum of Squares: ")[1].split("\n")[0]), float(result.split("Total Within-Cluster Sum of Squares: ")[1].split("\n")[0]), float(result.split("Between-Cluster Sum of Squares: ")[1].split("\n")[0]) / float(result.split("Total Sum of Squares: ")[1].split("\n")[0]), result.split("Converged: ")[1].split("\n")[0] == "True"] 
		self.metrics = tablesample(values, table_info = False)
		return (self)
Esempio n. 7
0
	def fit(self,
			input_relation: str, 
			X: list, 
			y: str,
			test_relation: str = ""):
		self.input_relation = input_relation
		self.test_relation = test_relation if (test_relation) else input_relation
		self.X = [str_column(column) for column in X]
		self.y = str_column(y)
		query = "SELECT LOGISTIC_REG('{}', '{}', '{}', '{}' USING PARAMETERS optimizer = '{}', epsilon = {}, max_iterations = {}"
		query = query.format(self.name, input_relation, self.y, ", ".join(self.X), self.solver, self.tol, self.max_iter)
		query += ", regularization = '{}', lambda = {}".format(self.penalty, self.C)
		if (self.penalty == 'ENet'):
			query += ", alpha = {}".format(self.l1_ratio)
		query += ")"
		self.cursor.execute(query)
		self.coef = to_tablesample(query = "SELECT GET_MODEL_ATTRIBUTE(USING PARAMETERS model_name = '{}', attr_name = 'details')".format(self.name), cursor = self.cursor)
		self.coef.table_info = False
		return (self)
Esempio n. 8
0
 def fit(self,
         input_relation: str,
         X: list,
         y: str,
         test_relation: str = ""):
     func = "APPROXIMATE_MEDIAN" if (self.p == 1) else "AVG"
     self.input_relation = input_relation
     self.test_relation = test_relation if (
         test_relation) else input_relation
     self.X = [str_column(column) for column in X]
     self.y = str_column(y)
     query = "SELECT {}, {} FROM {} WHERE {} IS NOT NULL GROUP BY {}".format(
         ", ".join([
             "{}({}) AS {}".format(func, column, column)
             for column in self.X
         ]), self.y, input_relation, self.y, self.y)
     self.centroids = to_tablesample(query=query, cursor=self.cursor)
     self.centroids.table_info = False
     self.classes = self.centroids.values[y]
     return (self)
Esempio n. 9
0
def confusion_matrix(y_true: str, 
					 y_score: str, 
					 input_relation: str,
					 cursor,
					 pos_label = 1):
	query  = "SELECT CONFUSION_MATRIX(obs, response USING PARAMETERS num_classes = 2) OVER() FROM (SELECT DECODE({}".format(y_true) 
	query += ", '{}', 1, NULL, NULL, 0) AS obs, DECODE({}, '{}', 1, NULL, NULL, 0) AS response FROM {}) x".format(pos_label, y_score, pos_label, input_relation) 
	result = to_tablesample(query, cursor)
	if (pos_label == 1):
		labels = [0, 1]
	else:
		labels = ["Non-{}".format(pos_label), pos_label]
	result.table_info = False
	del (result.values["comment"])
	result = result.transpose()
	result.values["actual_class"] = labels
	result = result.transpose()
	matrix = {"index": labels}
	for elem in result.values:
		if (elem != "actual_class"):
			matrix[elem] = result.values[elem]
	result.values = matrix
	return (result)
Esempio n. 10
0
 def get_tree(self, tree_id: int = 0):
     query = "SELECT READ_TREE ( USING PARAMETERS model_name = '{}', tree_id = {}, format = 'tabular');".format(
         self.name, tree_id)
     result = to_tablesample(query=query, cursor=self.cursor)
     result.table_info = False
     return (result)