def test_two_tables(self): result = format({"select": "*", "from": ["XYZZY", "ABC"]}) expected = "SELECT * FROM XYZZY, ABC" self.assertEqual(result, expected)
def test_no_arguments(self): result = format({"select": {"value": {"now": {}}}}) expected = "SELECT NOW()" self.assertEqual(result, expected)
def test_issue_104(self): expected = ( 'SELECT NomPropriete AS Categorie, ROUND(AVG(NotePonderee), 2) AS "Moyenne' ' des notes", ROUND(AVG(Complexite), 2) AS "Complexite moyenne" FROM' " Propriete, Categorie, Jeu WHERE IdPropriete = IdCategorie AND" " Categorie.IdJeu = Jeu.IdJeu AND NotePonderee > 0 GROUP BY IdPropriete," ' NomPropriete ORDER BY "Moyenne des notes" DESC,"Complexite moyenne" DESC' ) result = format({ "select": [ { "value": "NomPropriete", "name": "Categorie" }, { "value": { "round": [{ "avg": "NotePonderee" }, 2] }, "name": "Moyenne des notes", }, { "value": { "round": [{ "avg": "Complexite" }, 2] }, "name": "Complexite moyenne", }, ], "from": ["Propriete", "Categorie", "Jeu"], "where": { "and": [ { "eq": ["IdPropriete", "IdCategorie"] }, { "eq": ["Categorie.IdJeu", "Jeu.IdJeu"] }, { "gt": ["NotePonderee", 0] }, ] }, "groupby": [{ "value": "IdPropriete" }, { "value": "NomPropriete" }], "orderby": [ { "value": "Moyenne des notes", "sort": "desc" }, { "value": "Complexite moyenne", "sort": "desc" }, ], }) self.assertEqual(result, expected)
def test_no_arguments(self): result = format({'select': {'value': {'now': {}}}}) expected = "SELECT NOW()" self.assertEqual(result, expected)
def test_limit(self): result = format({"select": "*", "from": "a", "limit": 10}) expected = "SELECT * FROM a LIMIT 10" self.assertEqual(result, expected)
def test_limit(self): result = format({'select': '*', 'from': 'a', 'limit': 10}) expected = "SELECT * FROM a LIMIT 10" self.assertEqual(result, expected)
def opine_sql(self, sqlquery, mode='histogram'): """ One test case input (query): SELECT h.name FROM hotel_amsterdam AS h WHERE h.price <= 15 AND h.opine = 'very clean room'; expected output (pgsql): SELECT h.name, madlib.logregr_predict_prob( generate_features_phrases( -- user-defined aggregate p2v('very clean room'), -- numeric vectors are passed directly by the parser -- parser translates the query terms to vectors pvec, senti, count), M.coef) AS score FROM hotel_amsterdam AS h hotel_amsterdam_phrase_model AS pm, -- single row logistic regression model hotel_amsterdam_histogram AS c WHERE h.price <= 15 AND c.attribute = %s(nearest_attr) and h.name = c.hotel_name GROUP BY h.name ORDER BY score """ # TODOs # DONE 1. for each row in table hotel_amsterdam_histogram, add pvec, senti value to the table -- # NEED CORRECTION!!! 2. finish self.translate # 3. write plpython aggregate for generate_feature # DONE 4. create phrase_model table and insert the coef into the table # 5. (after the deadline) add same for marker, allow multiple entity tables, # remove the need to load histogram and extractions in memory, etc. #parser part parsed_sql, qterms, where_clause = self.sqlparser(sqlquery) nearest_attr = [] qvec_list = [] result_row = [] import psycopg2 conn = psycopg2.connect( "host=postgres dbname=postgres user=postgres port=5432") #no subjective query, only objective if len(qterms) == 0: pgsql = format(parsed_sql) cur = conn.cursor() cur.execute(pgsql) rows = cur.fetchall() #print(rows) result_row.append(rows) #computer each qterm column separately for qterm in qterms: qterm_sql = copy.deepcopy(parsed_sql) qterm_where_clause = copy.deepcopy(where_clause) qterm = qterm.lower() qvec = self.phrase2vec(qterm) qvec_list.append(qvec) attr_name, _ = self.interpret(qterm) nearest_attr.append(attr_name) print(qterm, attr_name) #translate pgsql = self.translate(qterm_sql, qterm, attr_name, qterm_where_clause) #print(pgsql) #plpython part cur = conn.cursor() cur.execute(pgsql) rows = cur.fetchall() result_row.append(rows) #aggregate logic result = {} for idx, row in enumerate(result_row): if row is None: continue if idx == 0: for bid in row: if len(bid) > 1: result[bid[0]] = bid[1] else: result[bid[0]] = 1 else: for bid in row: result[bid[0]] *= bid[1] #fuzzy logic now return sorted(result.items(), key=lambda x: -x[1])
def filter(self, where=None, limit=None, get_col_map=False): try: parsed_query = moz_sql_parser.parse( self._query.replace('FORMAT JSON', '')) modified_columns = [] for col, op, value in where or []: past_where_clause = parsed_query.get('where', {}) op = op.lower() op_json = binary_ops.get(op, None) if op_json is None: print( f"Operator: {op} not found in the sql parser operator list\n Using it anyway." ) op_json = op if op == 'like': value = '%' + value.strip('%') + '%' if 'clickhouse' in self.name().lower(): col = f'toString({col})' elif 'postgres' in self.name().lower(): col = f'{col}::text' elif 'mariadb' in self.name().lower( ) or 'mysql' in self.name().lower( ) or 'mssql' in self.name().lower(): col = f'CAST({col} AS TEXT)' modified_columns.append(col) where_clause = {op_json: [col, value]} if len(past_where_clause) > 0: where_clause = {'and': [where_clause, past_where_clause]} parsed_query['where'] = where_clause if limit is not None: parsed_query['limit'] = limit query = moz_sql_parser.format(parsed_query) query = query.replace('"', "'") query = query.replace("'.'", ".") for col in modified_columns: if f"'{col}'" in query: query = query.replace(f"'{col}'", col) df, col_map = self.query(query) df, col_map = unnest(df, col_map) if get_col_map: return df, col_map else: return df except Exception as e: print(traceback.format_exc()) print('Failed to filter using SQL: ', e) return super().filter(where=where, limit=limit, get_col_map=get_col_map)
def getPredIsNULL(sql, note='IS NULL'): # sql = sql.strip().replace('`', '').lower() # sql = sql.strip().lower() sql = sql.strip() if 'WHERE' in sql and 'HAVING' in sql: return '' if 'WHERE' not in sql and 'HAVING' not in sql and 'ON' not in sql: return sql subsql = sql subquery = False # parsing error now tokens = parser.parse(sql) for key, value in tokens.items(): if key == 'from' and 'value' in value: subsql = parser.format(value['value'], ansi_quotes=False) + ';' subquery = True break if subquery: newsubsql = getPredIsNULL(subsql, note) # fix comparison and optimize if newsubsql.strip().lower() != subsql.strip().lower(): newsubsql = ' (' + getPredIsNULL(subsql, note).replace( ';', '').strip() + ') AS t' # print('newsub: ', newsubsql) import re newsql = re.sub('\s*\(SELECT .*\) AS [a-zA-Z_`]*', newsubsql, sql, re.X) # print('sql: ', sql) # print('new: ', newsql) newsql = newsql.replace('= TRUE', 'IS TRUE'.lower()).replace( '= FALSE', 'IS FALSE'.lower()) return newsql else: subsql = sql # fix, drop limit, not effective select = subsql.replace(';', '').split('LIMIT')[0] if 'HAVING' in select: parsed = select.split('HAVING') pred = 'HAVING' elif 'WHERE' in select: parsed = select.split('WHERE') pred = 'WHERE' elif 'ON' in select: parsed = select.split('ON', 1) pred = 'ON' else: return subsql head = parsed[0].strip() + ';' if 'ORDER BY' in select: where = parsed[1].split('ORDER BY')[0][:-1].strip() else: where = parsed[1].strip() # if not where.strip().startswith('(') or not where.strip().endswith(')'): # where = '({})'.format(where) concated = getAddWhereToSelect(head, '( {} ) {}'.format(where, note), pred) # print(concated) concated = concated.replace('= TRUE', 'IS TRUE'.lower()).replace( '= FALSE', 'IS FALSE'.lower()) # print(concated) return concated
def parse_as_create_ai_table(self) -> dict: CREATE, AI, TABLE, VIEW, FROM, USING, AS = map( CaselessKeyword, "CREATE AI TABLE VIEW FROM USING AS".split()) AI_TABLE = AI + TABLE word = Word(alphanums + "_") expr = (CREATE + (AI_TABLE | VIEW) + word('ai_table_name') + AS + originalTextFor(nestedExpr('(', ')'))('select')) r = expr.parseString(self._sql) r = r.asDict() if r['select'].startswith('(') and r['select'].endswith(')'): r['select'] = r['select'][1:-1] r['select'] = r['select'].strip(' \n') select = moz_sql.parse(r['select']) if 'from' not in select \ or len(select['from']) != 2 \ or 'join' not in select['from'][1]: raise Exception( "'from' must be like: 'from integration.table join predictor'") # add 'name' to each statement for s in [ *select['select'], select['from'][0], select['from'][1]['join'] ]: if 'name' not in s: if '.' in s['value']: s['name'] = s['value'][s['value'].find('.') + 1:] else: s['name'] = s['value'] f = { 'integration': select['from'][0], 'predictor': select['from'][1]['join'] } # remove predictor join select['from'].pop() new_select = [] predictor_fields = [] integration_prefix = f"{f['integration']['name']}." for s in select['select']: if s['value'].startswith(integration_prefix): s['value'] = s['value'][len(integration_prefix):] new_select.append(s) else: predictor_fields.append(s) predictor_prefix = f"{f['predictor']['name']}." for pf in predictor_fields: if pf['value'].startswith(predictor_prefix): pf['value'] = pf['value'][len(predictor_prefix):] integration_name = f['integration']['value'][:f['integration']['value'] .find('.')] f['integration']['value'] = f['integration']['value'][ len(integration_name) + 1:] select['select'] = new_select integration_sql = moz_sql.format(select) res = { 'ai_table_name': r['ai_table_name'], 'integration_name': integration_name, 'integration_query': integration_sql, 'query_fields': select['select'], 'predictor_name': f['predictor']['value'], 'predictor_fields': predictor_fields } return res