def getIndex(dbName, tblName, chunkId=None): """ Return index data (array of (objectId, chunkId, subChunkId) tuples). This only works on partitined tables and is only supposed to be used with director table (but there is no check currently that table is a director table. Expects one parameter 'columns' which specifies comma-separated list of three column names. Default column names are "objectId", "chunkId", "subChunkId". Result returns columns in the same order as they are specified in 'columns' argument. """ _log.debug('request: %s', request) _log.debug('GET => get index') # validate params _validateDbName(dbName) _validateTableName(tblName) if chunkId is not None and chunkId < 0: raise ExceptionResponse(400, "InvalidArgument", "Chunk ID argument is negative") # get column names and validate columns = request.args.get('columns', "objectId,chunkId,subChunkId").strip() columns = columns.split(',') if len(columns) != 3: raise ExceptionResponse( 400, "InvalidArgument", "'columns' parameter requires comma-separated list of three column names" ) # check that table exists dbConn = Config.instance().dbEngine().connect() if not utils.tableExists(dbConn, tblName, dbName): raise ExceptionResponse( 404, "TableMissing", "Table %s.%s does not exist" % (dbName, tblName)) # regexp matching chunk table names (but not overlap tables). # TODO: we need some central location for things like this tblRe = re.compile('^' + tblName + '_([0-9]+)$') tables = [] for table in utils.listTables(dbConn, dbName): match = tblRe.match(table) if match is not None: if chunkId is not None: if chunkId == int(match.group(1)): tables.append(table) break # only one table can match else: tables.append(table) # we expect at least one chunk table to be found if not tables: _log.error('No matching chunk tables found for table %s.%s chunkId=%s', dbName, tblName, chunkId) raise ExceptionResponse( 404, "NoMatchingChunks", "Failed to find any chunk data table", "No matching chunks for table %s.%s chunkId=%s" % (dbName, tblName, chunkId)) _log.debug("tables to scan: %s", tables) # TODO: list of lists is probably not the most efficient storage result = [] for table in tables: query = "SELECT {0}, {1}, {2} FROM {3}.{4}" query = query.format(columns[0], columns[1], columns[2], dbName, table) _log.debug('query: %s', query) allRows = dbConn.execute(query) if allRows.keys(): descr = [ dict(name=d[0], type=utils.typeCode2Name(dbConn, d[1])) for d in allRows.cursor.description ] else: descr = [dict(name=name) for name in columns] _log.debug("description: %s", descr) while True: rows = allRows.fetchmany(1000000) if not rows: break for row in rows: result.append(tuple(row)) _log.debug("retrieved %d index rows", len(result)) return json.jsonify(result=dict(rows=result, description=descr))
def getIndex(dbName, tblName, chunkId=None): """ Return index data (array of (objectId, chunkId, subChunkId) tuples). This only works on partitined tables and is only supposed to be used with director table (but there is no check currently that table is a director table. Expects one parameter 'columns' which specifies comma-separated list of three column names. Default column names are "objectId", "chunkId", "subChunkId". Result returns columns in the same order as they are specified in 'columns' argument. """ _log.debug('request: %s', request) _log.debug('GET => get index') # validate params _validateDbName(dbName) _validateTableName(tblName) if chunkId is not None and chunkId < 0: raise ExceptionResponse(400, "InvalidArgument", "Chunk ID argument is negative") # get column names and validate columns = request.args.get('columns', "objectId,chunkId,subChunkId").strip() columns = columns.split(',') if len(columns) != 3: raise ExceptionResponse(400, "InvalidArgument", "'columns' parameter requires comma-separated list of three column names") # check that table exists dbConn = Config.instance().dbEngine().connect() if not utils.tableExists(dbConn, tblName, dbName): raise ExceptionResponse(404, "TableMissing", "Table %s.%s does not exist" % (dbName, tblName)) # regexp matching chunk table names (but not overlap tables). # TODO: we need some central location for things like this tblRe = re.compile('^' + tblName + '_([0-9]+)$') tables = [] for table in utils.listTables(dbConn, dbName): match = tblRe.match(table) if match is not None: if chunkId is not None: if chunkId == int(match.group(1)): tables.append(table) break # only one table can match else: tables.append(table) # we expect at least one chunk table to be found if not tables: _log.error('No matching chunk tables found for table %s.%s chunkId=%s', dbName, tblName, chunkId) raise ExceptionResponse(404, "NoMatchingChunks", "Failed to find any chunk data table", "No matching chunks for table %s.%s chunkId=%s" % (dbName, tblName, chunkId)) _log.debug("tables to scan: %s", tables) # TODO: list of lists is probably not the most efficient storage result = [] for table in tables: query = "SELECT {0}, {1}, {2} FROM {3}.{4}" query = query.format(columns[0], columns[1], columns[2], dbName, table) _log.debug('query: %s', query) allRows = dbConn.execute(query) if allRows.keys(): descr = [dict(name=d[0], type=utils.typeCode2Name(dbConn, d[1])) for d in allRows.cursor.description] else: descr = [dict(name=name) for name in columns] _log.debug("description: %s", descr) while True: rows = allRows.fetchmany(1000000) if not rows: break for row in rows: result.append(tuple(row)) _log.debug("retrieved %d index rows", len(result)) return json.jsonify(result=dict(rows=result, description=descr))