def execute(self): """Server-side query execution via plpy. Query can be run either cached or uncached, depending on C{plan_cache} setting given to L{__init__}. Returns result of plpy.execute(). """ args = self._arg_value_list types = self._arg_type_list if self._sqls is not None: self._sqls.append( { "sql": self.get_sql(PARAM_INLINE) } ) if self._plan_cache is not None: sql = self.get_sql(PARAM_PLPY) plan = self._plan_cache.get_plan(sql, types) res = plpy.execute(plan, args) else: sql = self.get_sql(PARAM_INLINE) res = plpy.execute(sql) if res: res = [skytools.dbdict(r) for r in res] return res
def hba_adj(cat, source, target, p, tablename='routing', col_geom='geom', col_edge='id', col_cost='cost', col_source='source', col_target='target', col_revc='reverse_cost', col_cat='category', col_name='name', col_rule='rule'): name = '' try: last_id = int(p[int(source)][1]['id']) name = p[int(source)][1]['name'] except: last_id = -1 if "source" in col_source: return plpy.execute(adj_plan, [source, last_id, target, name, cat[0]]) else: return plpy.execute(adj_plan_rev, [source, last_id, target, name, cat[0]])
def moran_local(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I implementation for PL/Python Andy Eschbacher """ # geometries with attributes that are null are ignored # resulting in a collection of not as near neighbors qvals = OrderedDict([("id_col", id_col), ("attr1", attr), ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(5) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(5)
def get_nonspatial_kmeans(self, query): """fetch data for non-spatial kmeans""" try: data = plpy.execute(query) return data except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def _batch_insert(insert_sql, batch_size, column_types, rows): full_batch_plan = None batch = [] for row in rows: batch.append(row) batch_len = len(batch) if batch_len >= batch_size: if full_batch_plan is None: full_batch_plan = _plan_batch(insert_sql, column_types, batch_len) plpy.execute(full_batch_plan, [item for sublist in batch for item in sublist]) del batch[:] if batch: plan = _plan_batch(insert_sql, column_types, len(batch)) plpy.execute(plan, [item for sublist in batch for item in sublist])
def hba_astar(source, target, ol, cl, cl2, cat, d, p, tablename='routing', col_geom='geom', col_edge='id', col_cost='cost', col_revc='reverse_cost', col_source='source', col_target='target', vertex_tablename='vertex', col_cat='category', col_vertex_geom='geom', col_name='name', col_rule='rule'): #If we don't have open candidates... if len(ol) == 0: return 0 if len(ol) > 0: #x <- node with smallest f-value x = hba_bestNext(ol) #We move through the next best option: cl.append(x) del ol[x] #Have we just found the middle point? if (x == target or x in cl2): try: last_id = int(p[x][1]['id']) except: last_id = -1 global central_node_plan if "source" in col_source: check_x = plpy.execute(central_node_plan, [x, last_id]) else: check_x = plpy.execute(central_node_plan, [last_id, x]) for checking in check_x: return x #Next candidates # If we are in the initialization buffer, use hba_adj_initialization if distance_plan == -1: global distance_plan distance_plan = plpy.prepare('\n\ ' + 'SELECT min(st_distance_sphere(v1.geom, v2.geom)) as dist from vertex v1, vertex v2 where v1.id = $1 and (v2.id = $2 or v2.id = $3)',['Integer', 'Integer', 'Integer']) distance =plpy.execute(distance_plan, [x, source, target], 1)[0]["dist"] adj = hba_adj(cat, x, target, p,tablename, col_geom, col_edge, col_cost, col_source, col_target, col_revc, col_cat, col_name, col_rule) #Forever alone if adj is None: plpy.error("This vertex is alone") #For each candidate hba_process_y(adj, p, cat, d, ol, cl, x, target, vertex_tablename, col_vertex_geom, col_edge, [], distance) #Return false, we still have to loop more return 0
def execute(self, operation, parameters=None): if self._is_closed(): raise Error() self.connection._ensure_transaction() parameters = parameters or [] placeholders = [] types = [] values = [] i = 0 for param in parameters: if param is None: # Directly put "None" as "NULL" in the sql # as it's not possible to get the type placeholders.append('NULL') else: i += 1 placeholders.append("$%d" % i) types.append(self.py_param_to_pg_type(param)) if types[-1] == 'bytea' and hasattr(param, 'tobytes'): values.append(param.tobytes()) else: values.append(param) query = operation % tuple(placeholders) try: plan = plpy.prepare(query, types) res = plpy.execute(plan, values) except plpy.SPIError as e: raise Error(e) self._execute_result = None self.rownumber = None self.description = None self.rowcount = -1 if res.status() in [self._SPI_OK_SELECT, self._SPI_OK_INSERT_RETURNING, self._SPI_OK_DELETE_RETURNING, self._SPI_OK_UPDATE_RETURNING]: if 'colnames' in res.__class__.__dict__: # Use colnames to get the order of the variables in the query self._execute_result = [tuple([row[col] for col in res.colnames()]) for row in res] else: self._execute_result = [tuple([row[col] for col in row]) for row in res] self.rownumber = 0 if 'colnames' in res.__class__.__dict__: # PG 9.2+: use .colnames() and .coltypes() methods self.description = [(name, get_type_obj(typeoid), None, None, None, None, None) for name, typeoid in zip(res.colnames(), res.coltypes())] elif len(res) > 0: # else get at least the column names from the row keys self.description = [(name, None, None, None, None, None, None) for name in res[0].keys()] else: # else we know nothing self.description = [(None, None, None, None, None, None, None)] if res.status() == self._SPI_OK_UTILITY: self.rowcount = -1 else: self.rowcount = res.nrows()
def spatial_markov_trend( subquery, time_cols, num_classes=7, w_type="knn", num_ngbrs=5, permutations=0, geom_col="the_geom", id_col="cartodb_id", ): """ Predict the trends of a unit based on: 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) 2. average class of its neighbors Inputs: @param subquery string: e.g., SELECT the_geom, cartodb_id, interesting_time_column FROM table_name @param time_cols list of strings: list of strings of column names @param num_classes (optional): number of classes to break distribution of values into. Currently uses quantile bins. @param w_type string (optional): weight type ('knn' or 'queen') @param num_ngbrs int (optional): number of neighbors (if knn type) @param permutations int (optional): number of permutations for test stats @param geom_col string (optional): name of column which contains the geometries @param id_col string (optional): name of column which has the ids of the table Outputs: @param trend_up float: probablity that a geom will move to a higher class @param trend_down float: probablity that a geom will move to a lower class @param trend float: (trend_up - trend_down) / trend_static @param volatility float: a measure of the volatility based on probability stddev(prob array) """ if len(time_cols) < 2: plpy.error("More than one time column needs to be passed") qvals = { "id_col": id_col, "time_cols": time_cols, "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs, } try: query_result = plpy.execute(pu.construct_neighbor_query(w_type, qvals)) if len(query_result) == 0: return zip([None], [None], [None], [None], [None]) except plpy.SPIError, e: plpy.debug("Query failed with exception %s: %s" % (err, pu.construct_neighbor_query(w_type, qvals))) plpy.error("Analysis failed: %s" % e) return zip([None], [None], [None], [None], [None])
def isconnected(transfos, doubletransfo=False): """ Check if transfos list corresponds to a connected graph """ success = True edges = {} # check connectivity # getting sources and targets for each transformation tlist = ['{},{}'.format(i, r) for i, r in enumerate(transfos)] vals = '({})'.format('),('.join(tlist)) rv = plpy.execute( """ select id, source, target from (values {}) as v join li3ds.transfo t on v.column2 = t.id order by v.column1 """.format(vals) ) transfoset = set([(r['source'], r['target']) for r in rv]) if not doubletransfo and len(transfoset) != len(rv): # multiple edges between source and target return False # fill the edges for later use for tra in rv: edges[tra['id']] = (tra['source'], tra['target']) # check connexity neighbors = defaultdict(set) # store referentials (nodes) nodes = set(chain.from_iterable(edges.values())) for tra, refs in edges.items(): neighbors[refs[0]].update({refs[1]}) neighbors[refs[1]].update({refs[0]}) visited_nodes = {} start_node = list(nodes)[0] queue = deque() queue.append(start_node) visited_nodes[start_node] = True while queue: node = queue.popleft() for child in neighbors[node]: if child not in visited_nodes: visited_nodes[child] = True queue.append(child) diff = len(visited_nodes) - len(nodes) if diff: success = False plpy.warning( 'disconnected graph, visited nodes {}, total {}' .format(len(visited_nodes), len(nodes)) ) return success
def mount_csv(in_file_name, out_table_name, sep=";", header=True, column_types = None, **kwargs): import plpy if sep != None: with open(in_file_name, "r") as fp: columns = fp.readline().replace("\n", "").replace("\r", "").split(sep) else: columns = ["value"] if header == False: columns = ["COL%d" %d for d in range(len(columns))] elif type(header) == list: columns = header header = False columns = map(lambda x: x.lower(), columns) if not column_types: column_types = ["text"] * len(columns) try: plpy.execute("""CREATE EXTENSION IF NOT EXISTS file_fdw;CREATE SERVER csv_server FOREIGN DATA WRAPPER file_fdw;""") except: pass if out_table_name.find(".") > 0: plpy.execute("CREATE SCHEMA IF NOT EXISTS %s;" %out_table_name.split(".")[0]) plpy.execute("DROP FOREIGN TABLE IF EXISTS %s;" %out_table_name) cmd = """CREATE FOREIGN TABLE %s (%s) SERVER csv_server OPTIONS (filename '%s', format 'csv', \ header '%s' %s);""" %(out_table_name, ",".join(["%s %s" %(columns[c], column_types[c]) for c in range(len(columns))]), in_file_name, "true" if header else "false", ", delimiter '%s'" %sep if sep != None else "") ret = plpy.execute(cmd) plpy.info(cmd) return ret
def hba_heuristic(source, target, tablename='vertex', col_geom='geom', col_id='id'): if heuristic_plan == -1: global heuristic_plan heuristic_plan = plpy.prepare('select st_distance(a.' + col_geom + ', b.' + col_geom + ') as cost from ' + tablename + ' as a, ' + tablename + ' as b where a.' + col_id + ' = $2 and b.' + col_id + ' = $3', ['text', 'integer', 'integer', 'text', 'text']) try: return plpy.execute(heuristic_plan, [col_geom, source, target, tablename, col_id], 1)[0]['cost'] except: plpy.info("No heuristic distance. This is a bug, probably.") return float('inf')
def run_presto_as_temp_table(server, user, catalog, schema, result_table, query): try: search_path = _get_session_search_path_array() if search_path != ['$user', 'public'] and len(search_path) > 0: # search_path is changed explicitly. Use the first schema schema = search_path[0] client = presto_client.Client(server=server, user=user, catalog=catalog, schema=schema, time_zone=_get_session_time_zone()) create_sql = "create temporary table %s (\n " % plpy.quote_ident(result_table) insert_sql = "insert into %s (\n " % plpy.quote_ident(result_table) values_types = [] q = client.query(query) try: # result schema column_names = [] column_types = [] for column in q.columns(): column_names.append(column.name) column_types.append(_pg_result_type(column.type)) # build SQL column_names = _rename_duplicated_column_names(column_names) create_sql = _build_create_temp_table_sql(result_table, column_names, column_types) insert_sql = _build_insert_into_sql(result_table, column_names) # run CREATE TABLE plpy.execute("drop table if exists " + plpy.quote_ident(result_table)) plpy.execute(create_sql) # run INSERT _batch_insert(insert_sql, 10, column_types, q.results()) finally: q.close() except (plpy.SPIError, presto_client.PrestoException) as e: # PL/Python converts an exception object in Python to an error message in PostgreSQL # using exception class name if exc.__module__ is either of "builtins", "exceptions", # or "__main__". Otherwise using "module.name" format. Set __module__ = "__module__" # to generate pretty messages. e.__class__.__module__ = "__main__" raise
def execute(self, arg_dict, all_keys_required = True): try: if all_keys_required: arg_list = [arg_dict[k] for k in self.arg_map] else: arg_list = [arg_dict.get(k) for k in self.arg_map] return plpy.execute(self.plan, arg_list) except KeyError: plpy.error("Missing argument: QUERY: %s ARGS: %s VALUES: %s" % ( repr(self.sql), repr(self.arg_map), repr(arg_dict)))
def _load_oid_to_type_name_mapping(oids): oids = filter(lambda oid: oid not in OidToTypeNameMapping, oids) if oids: sql = ("select oid, typname" \ " from pg_catalog.pg_type" \ " where oid in (%s)") % (", ".join(map(str, oids))) for row in plpy.execute(sql): OidToTypeNameMapping[int(row["oid"])] = row["typname"] return OidToTypeNameMapping
def create_and_predict_segment(query, variable, target_query, model_params): """ generate a segment with machine learning Stuart Lynn """ ## fetch column names try: columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys() except Exception, e: plpy.error('Failed to build segmentation model: %s' % e)
def get_type_obj(typeoid): """Return the type object (STRING, NUMBER, etc.) that corresponds to the given type OID.""" if not _typoid_typeobjs: for row in plpy.execute(plpy.prepare("SELECT oid, typname, typcategory FROM pg_type")): if row['typcategory'] in _typcategory_typeobjs: _typoid_typeobjs[int(row['oid'])] = _typcategory_typeobjs[row['typcategory']] elif row['typname'] in _typname_typeobjs: _typoid_typeobjs[int(row['oid'])] = _typname_typeobjs[row['typname']] return _typoid_typeobjs.get(typeoid)
def get_getis(self, w_type, params): """fetch data for getis ord's g""" try: query = pu.construct_neighbor_query(w_type, params) result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(4) else: return result except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def get_markov(self, w_type, params): """fetch data for spatial markov""" try: query = pu.construct_neighbor_query(w_type, params) data = plpy.execute(query) if len(data) == 0: return pu.empty_zipped_array(4) return data except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def start_presto_query(presto_server, presto_user, presto_catalog, presto_schema, function_name, query): try: # preserve search_path if explicitly set search_path = _get_session_search_path_array() if search_path != ['$user', 'public'] and len(search_path) > 0: # search_path is changed explicitly. use the first schema presto_schema = search_path[0] # start query client = presto_client.Client(server=presto_server, user=presto_user, catalog=presto_catalog, schema=presto_schema, time_zone=_get_session_time_zone()) query = client.query(query) session.query_auto_close = QueryAutoClose(query) try: # result schema column_names = [] column_types = [] for column in query.columns(): column_names.append(column.name) column_types.append(_pg_result_type(column.type)) column_names = _rename_duplicated_column_names(column_names, "a query result") session.query_auto_close.column_names = column_names session.query_auto_close.column_types = column_types # CREATE TABLE for return type of the function type_name = function_name + "_type" create_type_sql = _build_create_temp_table_sql(type_name, column_names, column_types) # CREATE FUNCTION create_function_sql = \ """ create or replace function pg_temp.%s() returns setof pg_temp.%s as $$ import prestogres return prestogres.fetch_presto_query_results() $$ language plpythonu """ % \ (plpy.quote_ident(function_name), plpy.quote_ident(type_name)) # run statements plpy.execute("drop table if exists pg_temp.%s cascade" % \ (plpy.quote_ident(type_name))) plpy.execute(create_type_sql) plpy.execute(create_function_sql) query = None finally: if query is not None: # close query session.query_auto_close = None except (plpy.SPIError, presto_client.PrestoException) as e: # PL/Python converts an exception object in Python to an error message in PostgreSQL # using exception class name if exc.__module__ is either of "builtins", "exceptions", # or "__main__". Otherwise using "module.name" format. Set __module__ = "__module__" # to generate pretty messages. e.__class__.__module__ = "__main__" raise
def country_to_iso3(country): """ Convert country to its iso3 code """ try: country_plan = plpy.prepare("SELECT adm0_a3 as iso3 FROM admin0_synonyms WHERE lower(regexp_replace($1, " \ "'[^a-zA-Z\u00C0-\u00ff]+', '', 'g'))::text = name_; ", ['text']) country_result = plpy.execute(country_plan, [country], 1) if country_result: return country_result[0]['iso3'] else: return None except BaseException as e: plpy.warning("Can't get the iso3 code from {0}: {1}".format(country, e)) return None
def execute(self, arg_dict, all_keys_required = True): try: if all_keys_required: arg_list = [arg_dict[k] for k in self.arg_map] else: arg_list = [arg_dict.get(k) for k in self.arg_map] return plpy.execute(self.plan, arg_list) except KeyError: need = set(self.arg_map) got = set(arg_dict.keys()) missing = list(need.difference(got)) plpy.error("Missing arguments: [%s] QUERY: %s" % ( ','.join(missing), repr(self.sql)))
def get_spatial_kmeans(self, params): """fetch data for spatial kmeans""" query = ("SELECT " "array_agg({id_col} ORDER BY {id_col}) as ids," "array_agg(ST_X({geom_col}) ORDER BY {id_col}) As xs," "array_agg(ST_Y({geom_col}) ORDER BY {id_col}) As ys " "FROM ({subquery}) As a " "WHERE {geom_col} IS NOT NULL").format(**params) try: data = plpy.execute(query) return data except plpy.SPIError, err: plpy.error('Analysis failed: %s' % err)
def get_moran(self, w_type, params): """fetch data for moran's i analyses""" try: query = pu.construct_neighbor_query(w_type, params) data = plpy.execute(query) # if there are no neighbors, exit if len(data) == 0: return pu.empty_zipped_array(2) return data except plpy.SPIError, err: plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(2)
def __init__(self, context, global_dict = None): """ This object must be initiated in the beginning of each db service """ DBService.__init__(self, context, global_dict) rec = skytools.db_urldecode(context) if "username" not in rec: plpy.error("Username must be provided in db service context parameter") self.username = rec['username'] # used for logging purposes res = plpy.execute("select txid_current() as txid;") row = res[0] self.version = row["txid"] self.rows_found = 0 # Flag set by run query to inicate number of rows got
def kmeans(query, no_clusters, no_init=20): data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, array_agg(ST_X(the_geom) order by cartodb_id) xs, array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a where the_geom is not null '''.format(query=query)) xs = data[0]['xs'] ys = data[0]['ys'] ids = data[0]['ids'] km = KMeans(n_clusters= no_clusters, n_init=no_init) labels = km.fit_predict(zip(xs,ys)) return zip(ids,labels)
def coordinates_to_polygon(coordinates): """Convert a Mapzen coordinates to a PostGIS polygon""" result_coordinates = [] for coordinate in coordinates: result_coordinates.append("%s %s" % (coordinate[0], coordinate[1])) wkt_coordinates = ','.join(result_coordinates) try: sql = "SELECT ST_MakePolygon(ST_GeomFromText('LINESTRING({0})', 4326)) as geom".format(wkt_coordinates) geometry = plpy.execute(sql, 1)[0]['geom'] except BaseException as e: plpy.warning("Can't generate POLYGON from coordinates: {0}".format(e)) geometry = None return geometry
def get_dyn_transfo_params_form_1(params_column, params, time): ''' Return the dynamic transfo parameters. ''' if isinstance(time, datetime.datetime): plpy.error('times as strings unsupported for dynamic transforms of form 1') schema, table, column = tuple(map(plpy.quote_ident, params_column.split('.'))) params = params[0] select = [] for param in params.values(): if isinstance(param, list): for dim in param: append_dim_select(dim, select) else: dim = param append_dim_select(dim, select) select = ', '.join(select) q = (''' with patch as ( select pc_interpolate({column}, 'time', {time:f}, true) point from {schema}.{table} where pc_patchmin({column}, 'time') <= {time:f} and pc_patchmax({column}, 'time') > {time:f} ) select %s from patch ''' % select).format(schema=schema, table=table, column=column, time=time) plpy.debug(q) rv = plpy.execute(q) if len(rv) == 0: plpy.warning('no parameters for the provided time ({:f})'.format(time)) return None if len(rv) != 1: plpy.error('multiple rows returned from time interpolation') values = rv[0] for key, param in params.items(): if isinstance(param, list): for i, dim in enumerate(param): val = values[dim] param[i] = val else: dim = param val = values[dim] params[key] = val return params
def execute(self, operation, parameters=None): if self._is_closed(): raise Error() self.connection._ensure_transaction() parameters = parameters or [] placeholders = [] types = [] values = [] for i, param in enumerate(parameters): placeholders.append("$%d" % (i + 1)) types.append(self.py_param_to_pg_type(param)) values.append(param) if len(placeholders) == 1: query = operation % placeholders[0] else: query = operation % placeholders try: plan = plpy.prepare(query, types) res = plpy.execute(plan, values) except plpy.SPIError as e: raise Error(e) self._execute_result = None self.rownumber = None self.description = None self.rowcount = -1 if res.status() == self._SPI_OK_SELECT: self._execute_result = [[row[col] for col in row] for row in res] self.rownumber = 0 if 'colnames' in res.__class__.__dict__: # PG 9.2+: use .colnames() and .coltypes() methods self.description = [(name, get_type_obj(typeoid), None, None, None, None, None) for name, typeoid in zip(res.colnames(), res.coltypes())] elif len(res) > 0: # else get at least the column names from the row keys self.description = [(name, None, None, None, None, None, None) for name in res[0].keys()] else: # else we know nothing self.description = [(None, None, None, None, None, None, None)] if res.status() == self._SPI_OK_UTILITY: self.rowcount = -1 else: self.rowcount = res.nrows()
def moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type): """ Moran's I Local Rate Andy Eschbacher """ plpy.notice('** Constructing query') # geometries with attributes that are null are ignored # resulting in a collection of not as near neighbors qvals = {"id_col": id_col, "numerator": numerator, "denominator": denominator, "geom_col": geom_column, "table": t, "num_ngbrs": num_ngbrs} q = get_query(w_type, qvals) try: r = plpy.execute(q) plpy.notice('** Query returned with %d rows' % len(r)) except plpy.SPIError: plpy.notice('** Query failed: "%s"' % q) plpy.notice('** Error: %s' % plpy.SPIError) plpy.notice('** Exiting function') return zip([None], [None], [None], [None]) plpy.notice('r.nrows() = %d' % r.nrows()) ## collect attributes numer = get_attributes(r, 1) denom = get_attributes(r, 2) w = get_weight(r, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, w, permutations=permutations) # find units of significance lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance) plpy.notice('** Finished calculations') ## TODO: Decide on which return values here return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order, lisa.y)
def geo_polyline_to_multipolygon(polyline): """Convert a HERE polyline shape to a PostGIS multipolygon""" # In case we receive an empty polyline from here and we don't want to # change this kind of thing in the extension sql if not polyline: sql = "SELECT ST_MPolyFromText(NULL, 4326) as geom" else: coordinates = [] for point in polyline: lat, lon = point.split(',') coordinates.append("%s %s" % (lon, lat)) wkt_coordinates = ','.join(coordinates) sql = "SELECT ST_MPolyFromText('MULTIPOLYGON((({0})))', 4326) as geom".format(wkt_coordinates) geometry = plpy.execute(sql, 1)[0]['geom'] return geometry
def get_gwr_predict(self, params): # pylint: disable=no-self-use """fetch data for gwr predict""" query = pu.gwr_predict_query(params) return plpy.execute(query)
def get_gwr(self, params): # pylint: disable=no-self-use """fetch data for gwr analysis""" query = pu.gwr_query(params) return plpy.execute(query)
# make sure we are run in a PL/Python block try: import plpy except ImportError: raise RuntimeError("%s can only be used in a PL/Python block" % __name__) try: # import the module functions from _pyhstore import parse_hstore, serialize_hstore except ImportError, e: # this might mean hstore is not yet loaded in this session, so try to # force loading it try: plpy.execute("select '1=>1'::hstore") except: # did not work for whatever reason, complain with the original exception raise e # try loading the functions again from _pyhstore import parse_hstore, serialize_hstore
def _get_txid(self): result = plpy.execute('select txid_current() as txid') return result[0]['txid']
def get_markov(self, w_type, params): """fetch data for spatial markov""" query = pu.construct_neighbor_query(w_type, params) return plpy.execute(query)
def get_gwr(self, params): """fetch data for gwr analysis""" query = pu.gwr_query(params) return plpy.execute(query)
def get_moran(self, w_type, params): # pylint: disable=no-self-use """fetch data for moran's i analyses""" query = pu.construct_neighbor_query(w_type, params) return plpy.execute(query)
def applyrow(tblname, ev_type, new_row, backup_row=None, alt_pkey_cols=None, fkey_cols=None, fkey_ref_table=None, fkey_ref_cols=None, fn_canapply=canapply_dummy, fn_colfilter=colfilter_full): """Core logic. Actual decisions will be done in callback functions. - [IUD]: If row referenced by fkey does not exist, event is not applied - If pkey does not exist but alt_pkey does, row is not applied. @param tblname: table name, schema-qualified @param ev_type: [IUD]:pkey1,pkey2 @param alt_pkey_cols: list of alternatice columns to consuder @param fkey_cols: columns in this table that refer to other table @param fkey_ref_table: other table referenced here @param fkey_ref_cols: column in other table that must match @param fn_canapply: callback function, gets new and old row, returns whether the row should be applied @param fn_colfilter: callback function, gets new and old row, returns dict of final columns to be applied """ gd = None # parse ev_type tmp = ev_type.split(':', 1) if len(tmp) != 2 or tmp[0] not in ('I', 'U', 'D'): raise DataError('Unsupported ev_type: ' + repr(ev_type)) if not tmp[1]: raise DataError('No pkey in event') cmd = tmp[0] pkey_cols = tmp[1].split(',') qtblname = skytools.quote_fqident(tblname) # parse ev_data fields = skytools.db_urldecode(new_row) if ev_type.find('}') >= 0: raise DataError('Really suspicious activity') if ",".join(fields.keys()).find('}') >= 0: raise DataError('Really suspicious activity 2') # generate pkey expressions tmp = ["%s = {%s}" % (skytools.quote_ident(k), k) for k in pkey_cols] pkey_expr = " and ".join(tmp) alt_pkey_expr = None if alt_pkey_cols: tmp = [ "%s = {%s}" % (skytools.quote_ident(k), k) for k in alt_pkey_cols ] alt_pkey_expr = " and ".join(tmp) log = "data ok" # # Row data seems fine, now apply it # if fkey_ref_table: tmp = [] for k, rk in zip(fkey_cols, fkey_ref_cols): tmp.append("%s = {%s}" % (skytools.quote_ident(rk), k)) fkey_expr = " and ".join(tmp) q = "select 1 from only %s where %s" % ( skytools.quote_fqident(fkey_ref_table), fkey_expr) res = skytools.plpy_exec(gd, q, fields) if not res: return "IGN: parent row does not exist" log += ", fkey ok" # fetch old row if alt_pkey_expr: q = "select * from only %s where %s for update" % (qtblname, alt_pkey_expr) res = skytools.plpy_exec(gd, q, fields) if res: oldrow = res[0] # if altpk matches, but pk not, then delete need_del = 0 for k in pkey_cols: # fixme: proper type cmp? if fields[k] != str(oldrow[k]): need_del = 1 break if need_del: log += ", altpk del" q = "delete from only %s where %s" % (qtblname, alt_pkey_expr) skytools.plpy_exec(gd, q, fields) res = None else: log += ", altpk ok" else: # no altpk q = "select * from only %s where %s for update" % (qtblname, pkey_expr) res = skytools.plpy_exec(None, q, fields) # got old row, with same pk and altpk if res: oldrow = res[0] log += ", old row" ok = fn_canapply(fields, oldrow) if ok: log += ", new row better" if not ok: # ignore the update return "IGN:" + log + ", current row more up-to-date" else: log += ", no old row" oldrow = None if res: if cmd == 'I': cmd = 'U' else: if cmd == 'U': cmd = 'I' # allow column changes if oldrow: fields2 = fn_colfilter(fields, oldrow) for k in pkey_cols: if k not in fields2: fields2[k] = fields[k] fields = fields2 # apply change if cmd == 'I': q = skytools.mk_insert_sql(fields, tblname, pkey_cols) elif cmd == 'U': q = skytools.mk_update_sql(fields, tblname, pkey_cols) elif cmd == 'D': q = skytools.mk_delete_sql(fields, tblname, pkey_cols) else: plpy.error('Huh') plpy.execute(q) return log
def setup_system_catalog(presto_server, presto_user, presto_catalog, presto_schema, access_role): search_path = _get_session_search_path_array() if search_path == ['$user', 'public']: # search_path is default value. plpy.execute("set search_path to %s" % plpy.quote_ident(presto_schema)) client = presto_client.Client(server=presto_server, user=presto_user, catalog=presto_catalog, schema='default') # get table list sql = "select table_schema, table_name, column_name, is_nullable, data_type" \ " from information_schema.columns" columns, rows = client.run(sql) if rows is None: rows = [] schemas = {} for row in rows: schema_name = row[0] table_name = row[1] column_name = row[2] is_nullable = row[3] column_type = row[4] if schema_name == "sys" or schema_name == "information_schema": # skip system schemas continue if len(schema_name) > PG_NAMEDATALEN - 1: plpy.warning("Schema %s is skipped because its name is longer than %d characters" % \ (plpy.quote_ident(schema_name), PG_NAMEDATALEN - 1)) continue tables = schemas.setdefault(schema_name, {}) if len(table_name) > PG_NAMEDATALEN - 1: plpy.warning("Table %s.%s is skipped because its name is longer than %d characters" % \ (plpy.quote_ident(schema_name), plpy.quote_ident(table_name), PG_NAMEDATALEN - 1)) continue columns = tables.setdefault(table_name, []) if len(column_name) > PG_NAMEDATALEN - 1: plpy.warning("Column %s.%s.%s is skipped because its name is longer than %d characters" % \ (plpy.quote_ident(schema_name), plpy.quote_ident(table_name), \ plpy.quote_ident(column_name), PG_NAMEDATALEN - 1)) continue columns.append(Column(column_name, column_type, is_nullable)) # drop all schemas excepting prestogres_catalog, information_schema and pg_% sql = "select n.nspname as schema_name from pg_catalog.pg_namespace n" \ " where n.nspname not in ('prestogres_catalog', 'information_schema')" \ " and n.nspname not like 'pg_%'" for row in plpy.cursor(sql): plpy.execute("drop schema %s cascade" % plpy.quote_ident(row["schema_name"])) # create schema and tables for schema_name, tables in sorted(schemas.items(), key=lambda (k, v): k): try: plpy.execute("create schema %s" % (plpy.quote_ident(schema_name))) except: # ignore error? pass for table_name, columns in sorted(tables.items(), key=lambda (k, v): k): column_names = [] column_types = [] not_nulls = [] for column in columns: column_names.append(column.name) column_types.append(_pg_table_type(column.type)) not_nulls.append(not column.nullable) # change columns column_names = _rename_duplicated_column_names( column_names, "%s.%s table" % (plpy.quote_ident(schema_name), plpy.quote_ident(table_name))) create_sql = _build_create_table(schema_name, table_name, column_names, column_types, not_nulls) plpy.execute(create_sql) # grant access on the schema to the restricted user so that # pg_table_is_visible(reloid) used by \d of psql command returns true plpy.execute("grant usage on schema %s to %s" % \ (plpy.quote_ident(schema_name), plpy.quote_ident(access_role))) # this SELECT privilege is unnecessary because queries against those tables # won't run on PostgreSQL. causing an exception is good if Prestogres has # a bug sending a presto query to PostgreSQL without rewriting. # TODO however, it's granted for now because some BI tools might check # has_table_privilege. the best solution is to grant privilege but # actually selecting from those tables causes an exception. plpy.execute("grant select on all tables in schema %s to %s" % \ (plpy.quote_ident(schema_name), plpy.quote_ident(access_role))) # fake current_database() to return Presto's catalog name to be compatible with some # applications that use db.schema.table syntax to identify a table if plpy.execute("select pg_catalog.current_database()")[0].values( )[0] != presto_catalog: plpy.execute( "delete from pg_catalog.pg_proc where proname='current_database'") plpy.execute("create function pg_catalog.current_database() returns name as $$begin return %s::name; end$$ language plpgsql stable strict" % \ plpy.quote_literal(presto_catalog))
def start_presto_query(presto_server, presto_user, presto_catalog, presto_schema, function_name, query): try: # preserve search_path if explicitly set search_path = _get_session_search_path_array() if search_path != ['$user', 'public'] and len(search_path) > 0: # search_path is changed explicitly. use the first schema presto_schema = search_path[0] # start query client = presto_client.Client(server=presto_server, user=presto_user, catalog=presto_catalog, schema=presto_schema, time_zone=_get_session_time_zone()) query = client.query(query) session.query_auto_close = QueryAutoClose(query) try: # result schema column_names = [] column_types = [] for column in query.columns(): column_names.append(column.name) column_types.append(_pg_result_type(column.type)) column_names = _rename_duplicated_column_names( column_names, "a query result") session.query_auto_close.column_names = column_names session.query_auto_close.column_types = column_types # CREATE TABLE for return type of the function type_name = function_name + "_type" create_type_sql = _build_create_temp_table_sql( type_name, column_names, column_types) # CREATE FUNCTION create_function_sql = \ """ create or replace function pg_temp.%s() returns setof pg_temp.%s as $$ import prestogres return prestogres.fetch_presto_query_results() $$ language plpythonu """ % \ (plpy.quote_ident(function_name), plpy.quote_ident(type_name)) # run statements plpy.execute("drop table if exists pg_temp.%s cascade" % \ (plpy.quote_ident(type_name))) plpy.execute(create_type_sql) plpy.execute(create_function_sql) query = None finally: if query is not None: # close query session.query_auto_close = None except (plpy.SPIError, presto_client.PrestoException) as e: # PL/Python converts an exception object in Python to an error message in PostgreSQL # using exception class name if exc.__module__ is either of "builtins", "exceptions", # or "__main__". Otherwise using "module.name" format. Set __module__ = "__module__" # to generate pretty messages. e.__class__.__module__ = "__main__" raise
def _get_session_search_path_array(): rows = plpy.execute( "select ('{' || current_setting('search_path') || '}')::text[]") return rows[0].values()[0]
def _get_session_time_zone(): rows = plpy.execute("show timezone") return rows[0].values()[0]
def get_getis(self, w_type, params): # pylint: disable=no-self-use """fetch data for getis ord's g""" query = pu.construct_neighbor_query(w_type, params) return plpy.execute(query)
def dijkstra(config, source, target, stoptosensor=''): ''' returns the transfo list needed to go from source referential to target referential ''' # get all transformations involved in the transfo tree list transfo_list = plpy.execute( """ select array_aggmult(tt.transfos) as trf from li3ds.platform_config pf join li3ds.transfo_tree tt on tt.id = ANY(pf.transfo_trees) where pf.id = {} """.format(config) )[0]['trf'] transfo_list_coma_separated = ','.join(map(str, transfo_list)) # list of adjacent nodes (referentials) # adj_list = [ref1: [ref7, ref1], ref2: [ref3]...] result = plpy.execute( """ select r.id , array_agg(t.target) filter (where t.target is not NULL) as adj_list -- we keep a NULL column instead of an array -- with a null value inside from li3ds.referential r left join li3ds.transfo t -- we only keep direct transformations on t.source = r.id and array[t.id] <@ array[{}]::integer[] group by r.id """.format(transfo_list_coma_separated) ) # build graph # graph = {ref1: [(1, ref7), (1, ref3)...], ...} graph = {} for column in result: if column['adj_list'] is not None: graph[column['id']] = [(1, idt) for idt in column['adj_list']] else: graph[column['id']] = [] if source not in graph: raise Exception("No referential with id {}".format(source)) if target not in graph: raise Exception("No referential with id {}".format(target)) M = set() d = {source: 0} p = {} next_nodes = [(0, source)] while next_nodes: dx, x = heappop(next_nodes) if x in M: continue M.add(x) for w, y in graph[x]: if y in M: continue dy = dx + w if y not in d or d[y] > dy: d[y] = dy heappush(next_nodes, (dy, y)) p[y] = x shortest_path = [target] x = target while x != source: try: x = p[x] except KeyError: plpy.notice("No path from ref:{} to ref:{} with config {}" .format(source, target, config)) return [] shortest_path.insert(0, x) if stoptosensor: # if a sensor type was requested we want to return # the first referential matching that type for ref in shortest_path: found = plpy.execute(""" select r.id, s.type from referential r join sensor s on r.sensor = s.id where r.id = {}""".format(ref)) if found[0]['type'] == stoptosensor: return [found[0]['id']] raise Exception( "No referential in path with type {}".format(stoptosensor)) # we have referentials now we need all transformations # assembling refs by pair ref_pair = [ shortest_path[i:i + 2] for i in range(0, len(shortest_path) - 1)] transfos = [] for ref_source, ref_target in ref_pair: transfos.append(plpy.execute( """ select id from li3ds.transfo where source = {} and target = {} and array[id] <@ array[{}]::integer[] """.format(ref_source, ref_target, transfo_list_coma_separated))[0]['id']) return transfos
def get_markov(self, w_type, params): # pylint: disable=no-self-use """fetch data for spatial markov""" query = pu.construct_neighbor_query(w_type, params) return plpy.execute(query)
def setup_system_catalog(presto_server, presto_user, presto_catalog, access_role): client = presto_client.Client(server=presto_server, user=presto_user, catalog=presto_catalog, schema='default') # get table list sql = "select table_schema, table_name, column_name, is_nullable, data_type" \ " from information_schema.columns" columns, rows = client.run(sql) if rows is None: rows = [] schemas = {} for row in rows: schema_name = row[0] table_name = row[1] column_name = row[2] is_nullable = row[3] column_type = row[4] if schema_name == "sys" or schema_name == "information_schema": # skip system schemas continue if len(schema_name) > PG_NAMEDATALEN - 1: plpy.warning("Schema %s is skipped because its name is longer than %d characters" % \ (plpy.quote_ident(schema_name), PG_NAMEDATALEN - 1)) continue tables = schemas.setdefault(schema_name, {}) if len(table_name) > PG_NAMEDATALEN - 1: plpy.warning("Table %s.%s is skipped because its name is longer than %d characters" % \ (plpy.quote_ident(schema_name), plpy.quote_ident(table_name), PG_NAMEDATALEN - 1)) continue columns = tables.setdefault(table_name, []) if len(column_name) > PG_NAMEDATALEN - 1: plpy.warning("Column %s.%s.%s is skipped because its name is longer than %d characters" % \ (plpy.quote_ident(schema_name), plpy.quote_ident(table_name), \ plpy.quote_ident(column_name), PG_NAMEDATALEN - 1)) continue columns.append(Column(column_name, column_type, is_nullable)) # drop all schemas excepting prestogres_catalog, information_schema and pg_% sql = "select n.nspname as schema_name from pg_catalog.pg_namespace n" \ " where n.nspname not in ('prestogres_catalog', 'information_schema')" \ " and n.nspname not like 'pg_%'" for row in plpy.cursor(sql): plpy.execute("drop schema %s cascade" % plpy.quote_ident(row["schema_name"])) # create schema and tables for schema_name, tables in sorted(schemas.items(), key=lambda (k,v): k): try: plpy.execute("create schema %s" % (plpy.quote_ident(schema_name))) except: # ignore error? pass # grant access on the all tables to the restricted user plpy.execute("grant select on all tables in schema %s to %s" % \ (plpy.quote_ident(schema_name), plpy.quote_ident(access_role))) for table_name, columns in sorted(tables.items(), key=lambda (k,v): k): column_names = [] column_types = [] not_nulls = [] for column in columns: column_names.append(column.name) column_types.append(_pg_table_type(column.type)) not_nulls.append(not column.nullable) # change columns create_sql = _build_create_table(schema_name, table_name, column_names, column_types, not_nulls) plpy.execute(create_sql) # update pg_database plpy.execute("update pg_database set datname=%s where datname=current_database()" % \ plpy.quote_literal(presto_catalog))
def get_schema(pcid, schemas, connection_string): """this function returns a pcschema object, either taking it from GD or taking it from within database, or taking it from outside database""" #create_GD_if_not_exists() #create_schemas_if_not_exists() #trying to get the schema from GD if str(pcid) in schemas: #print "schema %s was in global dictionnary GD\n" % pcid return schemas[str(pcid)] #if we get it, stop there #are we inside or outside the database (plpython or python) try: import plpy executing_in_postgres = True except ImportError: executing_in_postgres = False if executing_in_postgres == True: #use DBAPI to get the schema with given pcid #print "getting the schema of pcid : %s from within database (DBAPI)\n" % pcid #plpy.notice("getting the schema of pcid : "+str(pcid)+" from within database (DBAPI)\n") query = """SELECT pf.srid, pf.schema, srs.srtext FROM pointcloud_formats as pf LEFT OUTER JOIN public.spatial_ref_sys AS srs ON (srs.srid = pf.srid) WHERE pcid = %d""" % pcid result_query = plpy.execute(query, 1) schema_xml = (result_query[0]['schema']).encode('utf-8') srid = int(result_query[0]['srid']) srtext = result_query[0]['srtext'] else: #use psycopg2 api to get the schema import psycopg2 #print "getting the schema of pcid : %s from outside database (PSYCOPG2)\n" % pcid conn = psycopg2.connect(connection_string) conn.set_client_encoding('utf-8') cur = conn.cursor() cur.execute( """SELECT pf.srid, convert_to(pf.schema,'UTF8') as schema, srs.srtext FROM pointcloud_formats as pf LEFT OUTER JOIN public.spatial_ref_sys AS srs ON (srs.srid = pf.srid) WHERE pcid = %s""", [pcid]) result_query = cur.fetchone() toto = str(result_query[1]) titi = unicode(toto, "UTF-8") schema_xml = titi srid = int(result_query[0]) srtext = result_query[2] conn.commit() cur.close() conn.close() #both case : create a pcpschema, store it pc_schema = pcschema() pc_schema.parsexml(schema_xml) pc_schema.pcid = pcid pc_schema.srid = srid pc_schema.srtext = srtext schemas[str(pcid)] = pc_schema return pc_schema
def hba_astar(source, target, ol, cl, cl2, cat, d, p, tablename='routing', col_geom='geom', col_edge='id', col_cost='cost', col_revc='reverse_cost', col_source='source', col_target='target', vertex_tablename='vertex', col_cat='category', col_vertex_geom='geom', col_name='name', col_rule='rule'): #If we don't have open candidates... if len(ol) == 0: return 0 if len(ol) > 0: #x <- node with smallest f-value x = hba_bestNext(ol) #We move through the next best option: cl.append(x) del ol[x] #Have we just found the middle point? if (x == target or x in cl2): try: last_id = int(p[x][1]['id']) except: last_id = -1 global central_node_plan if "source" in col_source: check_x = plpy.execute(central_node_plan, [x, last_id]) else: check_x = plpy.execute(central_node_plan, [last_id, x]) for checking in check_x: return x #Next candidates # If we are in the initialization buffer, use hba_adj_initialization if distance_plan == -1: global distance_plan distance_plan = plpy.prepare( '\n\ ' + 'SELECT min(st_distance_sphere(v1.geom, v2.geom)) as dist from vertex v1, vertex v2 where v1.id = $1 and (v2.id = $2 or v2.id = $3)', ['Integer', 'Integer', 'Integer']) distance = plpy.execute(distance_plan, [x, source, target], 1)[0]["dist"] adj = hba_adj(cat, x, target, p, tablename, col_geom, col_edge, col_cost, col_source, col_target, col_revc, col_cat, col_name, col_rule) #Forever alone if adj is None: plpy.error("This vertex is alone") #For each candidate hba_process_y(adj, p, cat, d, ol, cl, x, target, vertex_tablename, col_vertex_geom, col_edge, [], distance) #Return false, we still have to loop more return 0
def get_moran(self, w_type, params): """fetch data for moran's i analyses""" query = pu.construct_neighbor_query(w_type, params) return plpy.execute(query)
def get_gwr_predict(self, params): """fetch data for gwr predict""" query = pu.gwr_predict_query(params) return plpy.execute(query)
batch_size = 1000 joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features]) try: cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format( joined_features=joined_features, target_query=target_query)) except Exception, e: plpy.error('Failed to build segmentation model: %s' % e) results = [] while True: rows = cursor.fetch(batch_size) if not rows: break batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows]) #Need to fix this. Should be global mean. This will cause weird effects batch = replace_nan_with_mean(batch) prediction = model.predict(batch) results.append(prediction) try: cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids'] except Exception, e: plpy.error('Failed to build segmentation model: %s' % e) return cartodb_ids, np.concatenate(results)
def get_getis(self, w_type, params): """fetch data for getis ord's g""" query = pu.construct_neighbor_query(w_type, params) return plpy.execute(query)