def writeYAMLConfig(datasetId, path, newConfig): cache = getCache() cacheKey = 'config' + datasetId del cache[cacheKey] dataset_folder = join(sourceDir, datasetId) temp_settings_filename = ImpUtils.GetTempFileName() with open(temp_settings_filename, 'w') as temp_settings_file: temp_settings_file.write(newConfig) validators = { 'settings': lambda path: (join(dataset_folder, 'settings'), SettingsDataset(temp_settings_filename, validate=True)), 'genome': lambda path: (join(dataset_folder, 'refgenome', 'settings'), SettingsRefGenome(temp_settings_filename, validate=True)), 'tablesById': lambda path: (join(dataset_folder, 'datatables', path[0], 'settings'), SettingsDataTable(temp_settings_filename, validate=True)), 'twoDTablesById': lambda path: (join(dataset_folder, '2D_datatables', path[0], 'settings'), SettingsDataTable(temp_settings_filename, validate=True)), } path = path.split('.') try: (settings_file, validator) = validators[path[0]](path[1:]) #Validation happens in the validator constructor that is called in the lambda #So if we get here without exception thrown by validation then we can copy the new settings onto the old os.system('mv %s %s' % (temp_settings_filename, settings_file)) finally: try: os.remove(temp_settings_filename) except OSError: pass
def link(self, words): print 'Preprocessing Input...' processedWords = self.preprocess(words) print 'Caching Possible Wikipedia Pages For Faster Runtime...' c = cache.getCache(processedWords) print 'Linking Initial Input...' searchAgent = LocalSearch(processedWords, c) result = searchAgent.runLocalSearch(self.alpha, self.iterations) #print self.prettify(result) print result return result
def getJSONConfig(datasetId, cache=True): if cache: cache = getCache() cacheKey = 'config' + datasetId try: result = cache[cacheKey] except KeyError: result = readJSONConfig(datasetId) cache.set(cacheKey, result, expire=5*60) else: result = readJSONConfig(datasetId) return result
def writeJSONConfig(datasetId, action, path, newConfig): cache = getCache() cacheKey = 'config' + datasetId del cache[cacheKey] dataset_folder = join(sourceDir, datasetId) #We have a path in the combined JSON object - we now follow the path until we hit a subset confined to one YAML handler writers = { 'settings': lambda path: (path, SettingsDataset(join(dataset_folder, 'settings'), validate=True)), 'chromosomes': lambda path: (path, ReadOnlyErrorWriter('chromosomes')), 'tablesById': lambda path: (path[1:], SettingsDataTable(join(dataset_folder, 'datatables', path[0], 'settings'), validate=True)), 'twoDTablesById': lambda path: (path[1:], SettingsDataTable(join(dataset_folder, '2D_datatables', path[0], 'settings'), validate=True)), 'genome': lambda path: (path, ReadOnlyErrorWriter('genome')), #For now as this will likely get a refactor 'mapLayers': lambda path: (path, ReadOnlyErrorWriter('mapLayers')), # For now as this will likely get a refactor 'docs': lambda path: (path, DocsWriter(datasetId)) } path = path.split('.') (path, writer) = writers[path[0]](path[1:]) return writer.updateAndWriteBack(action, path, newConfig, validate=True)
def proxyGET(subpath): # 用扩展名判断是否使用本地缓存 cache_type = os.path.splitext(subpath)[1] if cache_type in ('.png', '.mp3', '.json'): # 先检测缓存是否有效 cache_flag = cache.checkCacheServer( subpath, request.args.get('version', default=None)) if cache_flag > 0: # 带着cache_flag进download,需要验证时就带If-Modified-Since resp = download(request, cache_flag) cache_byte = resp.data # 缓存有效或验证后返回304时用本地缓存 if cache_flag == 0 or resp.status_code == 304: # 浏览器发送If-Modified-Since且是符合缓存内容时,要先检测这个 if 'If-Modified-Since' in request.headers: last_modified = request.headers.get('If-Modified-Since') if cache.checkCacheBrowser(subpath, last_modified): return Response('', 304, cache_headers) app.logger.debug('使用本地缓存: %s', subpath) cache_byte, last_modified = cache.getCache(subpath) else: last_modified = resp.headers['Last-Modified'] cache_json = { 'deadline': str(time.time() + 2592000), 'version': request.args.get('version', default=None), 'last_modified': last_modified, } cache.setCache(cache_byte, subpath, cache_json) # 用扩展名完成headers cache_headers.update(Content_Types[cache_type]) cache_headers['Content-Length'] = len(cache_byte) cache_headers['Last-Modified'] = last_modified return Response(cache_byte, 200, cache_headers) else: app.logger.debug('转发GET: %s', request.url) return transmitGET(request)
def response(returndata): url = returndata['url'] cache = getCache() cacheKey = json.dumps([url]) returndata['content'] = None use_cache = returndata['cache'] and not os.getenv('STAGING', '') if use_cache: try: returndata['content'] = cache[cacheKey] except KeyError: pass if returndata['content'] is None: file = urllib.request.urlopen(url) data = file.read() file.close() data = xmltodict.parse(data) returndata['content'] = DQXbase64.b64encode_var2(json.dumps(data)) if use_cache: cache[cacheKey] = returndata['content'] return returndata
""" Things remaining: 1. Renames with no content change. Tricky. """ CC_LSH = ['lsh', '-fmt', '%o%m|%Nd|%u|%En|%Vn|'+cc.getCommentFmt()+'\\n', '-recurse'] DELIM = '|' ARGS = { 'stash': 'Wraps the rebase in a stash to avoid file changes being lost', 'dry_run': 'Prints a list of changesets to be imported', 'lshistory': 'Prints the raw output of lshistory to be cached for load', 'load': 'Loads the contents of a previously saved lshistory file', } cache = getCache() def main(stash=False, dry_run=False, lshistory=False, load=None): validateCC() if not (stash or dry_run or lshistory): checkPristine() since = getSince() cache.start() if load: history = open(load, 'r').read().decode(ENCODING) else: cc.rebase() history = getHistory(since) write(join(GIT_DIR, '.git', 'lshistory.bak'), history.encode(ENCODING)) if lshistory: print(history)
""" CC_LSH = [ 'lsh', '-fmt', '%o%m|%Nd|%u|%En|%Vn|' + cc.getCommentFmt() + '\\n', '-recurse' ] DELIM = '|' ARGS = { 'stash': 'Wraps the rebase in a stash to avoid file changes being lost', 'dry_run': 'Prints a list of changesets to be imported', 'lshistory': 'Prints the raw output of lshistory to be cached for load', 'load': 'Loads the contents of a previously saved lshistory file', } cache = getCache() def main(stash=False, dry_run=False, lshistory=False, load=None): validateCC() if not (stash or dry_run or lshistory): checkPristine() since = getSince() cache.start() if load: history = open(load, 'r').read().decode(ENCODING) else: cc.rebase() history = getHistory(since) write(join(GIT_DIR, '.git', 'lshistory.bak'), history.encode(ENCODING)) if lshistory:
def handler(start_response, requestData): try: length = int(requestData['environ'].get('CONTENT_LENGTH', '0')) except ValueError: length = 0 content = requestData['environ']['wsgi.input'].read(length).decode("utf-8") content = json.loads(content) if len(content) > 0 else None if not content: raise SyntaxError('No query parameters supplied') database = content['database'] # Due to caching we check for auth here, as otherwise auth is only checked on DB read. credentials = DQXDbTools.CredentialInformation(requestData) credentials.VerifyCanDo(DQXDbTools.DbOperationRead(database)) tableId = content['table'] query = content['query'] orderBy = json.loads(content.get('orderBy', '[]')) distinct = content.get('distinct', 'false') == 'true' rawColumns = json.loads(content['columns']) columns = list(map(decode, rawColumns)) groupBy = content.get('groupBy', None) startRow, endRow = None, None if content.get('limit', False): startRow, endRow = content['limit'].split('~') startRow = int(startRow) endRow = int(endRow) if startRow < 0: startRow = 0 if endRow <= startRow: endRow = startRow + 1 randomSample = None if content.get('randomSample', False): randomSample = int(content['randomSample']) cacheData = content.get('cache', True) joins = json.loads(content.get('joins', '[]')) auth_query = credentials.get_auth_query( database, [join['foreignTable'] for join in joins] + [tableId]) cache = getCache() cacheKey = json.dumps([ tableId, query, orderBy, distinct, columns, groupBy, database, startRow, endRow, joins, auth_query ]) data = None if cacheData and randomSample is None: # Don't serve cache on random sample!! try: data = cache[cacheKey] except KeyError: pass if data is None: with DQXDbTools.DBCursor(requestData, database, read_timeout=config.TIMEOUT) as cur: whereClause = DQXDbTools.WhereClause() whereClause.ParameterPlaceHolder = '%s' whereClause.Decode(query, True) if auth_query: whereClause.query = { "whcClass": "compound", "isCompound": True, "isRoot": True, "Components": [whereClause.query, auth_query], "Tpe": "AND" } whereClause.CreateSelectStatement() sqlQuery = "SELECT " if distinct: sqlQuery += " DISTINCT " sqlQuery += "{0} FROM {1}".format(','.join(columns), DBTBESC(tableId)) for join in joins: if 'type' in join and join['type'] in [ '', 'INNER', 'LEFT', 'RIGHT', 'FULL' ]: sqlQuery += " {0} JOIN {1} ON {2} = {3}".format( join['type'].upper(), DBTBESC(join['foreignTable']), DBCOLESC(join['foreignColumn']), DBCOLESC(join['column'])) else: raise SyntaxError('Join type not valid') if len(whereClause.querystring_params) > 0: sqlQuery += " WHERE {0}".format(whereClause.querystring_params) if groupBy and len(groupBy) > 0: sqlQuery += " GROUP BY " + ','.join( map(DBCOLESC, groupBy.split('~'))) if len(orderBy) > 0: sqlQuery += " ORDER BY {0}".format(','.join([ DBCOLESC(col) + ' ' + direction for direction, col in orderBy ])) if startRow is not None and endRow is not None: sqlQuery += " LIMIT {0} OFFSET {1}".format( endRow - startRow + 1, startRow) if randomSample is not None: sqlQuery += " SAMPLE {0}".format(randomSample) if DQXDbTools.LogRequests: DQXUtils.LogServer('###QRY:' + sqlQuery) DQXUtils.LogServer('###PARAMS:' + str(whereClause.queryparams)) cur.execute(sqlQuery, whereClause.queryparams) rows = cur.fetchall() result = {} for rawCol, (i, desc) in zip(rawColumns, enumerate(cur.description)): # Figure out the name we should return for the column - by deafult monet doesn't qualify names col_name = name(rawCol, desc[0]) dtype = desciptionToDType(desc[1]) if dtype in ['i1', 'i2', 'i4', 'S']: null_value = NULL_VALUES[dtype] result[col_name] = np.array( [(str(row[i]).encode('utf-8') if dtype == 'S' else row[i]) if row[i] is not None else null_value for row in rows], dtype=dtype) else: result[col_name] = np.array([row[i] for row in rows], dtype=dtype) data = gzip(data=b''.join( arraybuffer.encode_array_set(list(result.items())))) if cacheData: cache[cacheKey] = data status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip'), ('Access-Control-Allow-Origin', '*')] start_response(status, response_headers) yield data
def index_table_query(dataset, cur, table, fields, query, auth_query, order, limit, offset, fail_limit, index_field, sample): if limit and fail_limit: raise Exception("Only one type of limit can be specified") where = DQXDbTools.WhereClause() where.ParameterPlaceHolder = '%s' #NOTE!: MySQL PyODDBC seems to require this nonstardard coding where.Decode(query) if auth_query: where.query = { "whcClass": "compound", "isCompound": True, "isRoot": True, "Components": [where.query, auth_query], "Tpe": "AND" } where.CreateSelectStatement() if index_field not in fields: fields.append(index_field) if len(where.querystring_params) > 0: query = "WHERE " + where.querystring_params + ' AND ' + DQXDbTools.ToSafeIdentifier( index_field) + ' IS NOT NULL' else: query = "WHERE " + DQXDbTools.DBCOLESC(index_field) + ' IS NOT NULL' fields_string = ','.join('"' + DQXDbTools.ToSafeIdentifier(f) + '"' for f in fields) table = DQXDbTools.ToSafeIdentifier(table) sqlquery = 'SELECT {fields_string} FROM "{table}" {query}'.format( **locals()) if order: sqlquery += ' ORDER BY "{0}"'.format( DQXDbTools.ToSafeIdentifier(order)) params = where.queryparams #Set the limit to one past the req limit = limit or fail_limit if limit: sqlquery += ' LIMIT %s' params.append(int(limit)) if offset: sqlquery += ' OFFSET %s' params.append(int(offset)) if sample is not None: sqlquery += ' SAMPLE {0}'.format(sample) cache = getCache() cacheKey = json.dumps([sqlquery, params]) rows, description = None, None try: rows, description = cache[cacheKey] except KeyError: print('2D', sqlquery, params) pass if rows is None: cur.execute(sqlquery, params) rows = cur.fetchall() description = cur.description #We cache even if random sample is requested such that requests at different points on the col axis pick the same rows and vice-versa - ie we always want the same random sample. cache[cacheKey] = [rows, description] data = {} for i, (field, desc) in enumerate(zip(fields, description)): dtype = desciptionToDType(desc[1]) data[field] = np.array([row[i] for row in rows], dtype=dtype) return data
def handler(start_response, request_data): datatable = request_data['table'] dataset = request_data['dataset'] # Due to caching we check for auth here, as otherwise auth is only checked on DB read. credentials = DQXDbTools.CredentialInformation(request_data) credentials.VerifyCanDo(DQXDbTools.DbOperationRead(dataset)) two_d_properties = request_data['2DProperties'].split('~') col_properties = request_data['colProperties'].split('~') row_properties = request_data['rowProperties'].split('~') col_qry = request_data['colQry'] col_order = request_data['colOrder'] row_qry = request_data['rowQry'] row_order = request_data.get('rowOrder', None) row_order_columns = [] if row_order == 'columns': try: row_order_columns = request_data['rowSortCols'].split('~') except KeyError: pass row_order = None try: col_limit = int(request_data['colLimit']) except KeyError: col_limit = None try: row_limit = int(request_data['rowLimit']) except KeyError: row_limit = None try: col_offset = int(request_data['colOffset']) except KeyError: col_offset = None try: row_offset = int(request_data['rowOffset']) except KeyError: row_offset = None #Set fail limit to one past so we know if we hit it try: col_fail_limit = int(request_data['colFailLimit']) + 1 except KeyError: col_fail_limit = None try: row_sort_property = request_data['rowSortProperty'] except KeyError: row_sort_property = None try: col_key = request_data['colKey'] except KeyError: col_key = None try: sort_mode = request_data['sortMode'] except KeyError: sort_mode = None try: row_random_sample = int(request_data['rowRandomSample']) except KeyError: row_random_sample = None col_index_field = datatable + '_column_index' row_index_field = datatable + '_row_index' col_properties.append(col_index_field) row_properties.append(row_index_field) with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_tablename, row_tablename = get_table_ids(cur, dataset, datatable) col_auth_query = credentials.get_auth_query(dataset, [col_tablename]) row_auth_query = credentials.get_auth_query(dataset, [row_tablename]) cache = getCache() cache_key = json.dumps([ datatable, dataset, two_d_properties, col_properties, row_properties, col_qry, col_order, row_qry, row_order, row_order_columns, row_random_sample, col_limit, row_limit, col_offset, row_offset, col_fail_limit, row_sort_property, col_key, sort_mode, col_auth_query, row_auth_query ]) data = None try: data = cache[cache_key] except KeyError: print('2D Cache miss') pass if data is None: with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_result = index_table_query(dataset, cur, col_tablename, col_properties, col_qry, col_auth_query, col_order, col_limit, col_offset, col_fail_limit, col_index_field, None) if len(row_order_columns) > 0: #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort. row_result = index_table_query(dataset, cur, row_tablename, row_properties, row_qry, row_auth_query, row_order, None, None, None, row_index_field, row_random_sample) else: row_result = index_table_query(dataset, cur, row_tablename, row_properties, row_qry, row_auth_query, row_order, row_limit, row_offset, None, row_index_field, row_random_sample) col_idx = col_result[col_index_field] row_idx = row_result[row_index_field] del col_result[col_index_field] del row_result[row_index_field] if len(col_idx) == col_fail_limit: result_set = [('_over_col_limit', np.array([0], dtype='i1'))] for name, array in list(row_result.items()): result_set.append((('row_' + name), array)) else: if len(row_order_columns) > 0 and len(row_idx) > 0: #Translate primkeys to idx sqlquery = 'SELECT "{col_field}", "{idx_field}" FROM "{table}" WHERE "{col_field}" IN ({params})'.format( idx_field=DQXDbTools.ToSafeIdentifier(col_index_field), table=DQXDbTools.ToSafeIdentifier(col_tablename), params="'" + "','".join( map(DQXDbTools.ToSafeIdentifier, row_order_columns)) + "'", col_field=DQXDbTools.ToSafeIdentifier(col_key)) idx_for_col = dict((k, v) for k, v in cur.fetchall()) #Sort by the order specified - reverse so last clicked is major sort sort_col_idx = list( reversed( [idx_for_col[key] for key in row_order_columns])) #grab the data needed to sort sort_data = extract2D(dataset, datatable, row_idx, sort_col_idx, [row_sort_property]) rows = list(zip(row_idx, sort_data[row_sort_property])) if sort_mode == 'call': polyploid_key_func = lambda row: ''.join( summarise_call(calls) for calls in row[1]) haploid_key_func = lambda row: ''.join( [str(c).zfill(2) for c in row[1]]) if len(rows[0][1].shape) == 1: rows.sort(key=haploid_key_func, reverse=True) else: rows.sort(key=polyploid_key_func, reverse=True) elif sort_mode == 'fraction': for i in range(len(sort_col_idx)): #TODO Shuld be some fancy bayesian shizzle def key_func(row): if sum(row[1][i]) == 0: return '-1' return str(1 - float(row[1][i][0]) / sum(row[1][i])) + str(sum( row[1][i])).zfill(4) rows.sort(key=key_func, reverse=True) else: print("Unimplemented sort_mode") row_pos_for_idx = dict( list(zip(row_idx, list(range(len(row_idx)))))) #Now just get the row_idx to pass to 2d extract for the slice we need row_idx = np.array( map(itemgetter(0), rows)[row_offset:row_offset + row_limit]) #Use this row idx to retieve the row data from the initial query for name, array in list(row_result.items()): row_result[name] = array[[ row_pos_for_idx[idx] for idx in row_idx ]] two_d_result = extract2D(dataset, datatable, row_idx, col_idx, two_d_properties) result_set = [] for name, array in list(col_result.items()): result_set.append((('col_' + name), array)) for name, array in list(row_result.items()): result_set.append((('row_' + name), array)) for name, array in list(two_d_result.items()): result_set.append((('2D_' + name), array)) data = gzip(data=b''.join(arraybuffer.encode_array_set(result_set))) cache[cache_key] = data status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip'), ('Access-Control-Allow-Origin', '*')] start_response(status, response_headers) yield data
def ImportDataSet(calculationObject, baseFolder, datasetId, importSettings): with calculationObject.LogHeader( 'Importing dataset {0}'.format(datasetId)): calculationObject.Log('Import settings: ' + str(importSettings)) datasetFolder = join(baseFolder, datasetId) # Monetdb doesn't allow renames of non-empty schemas, so we import to a random, then set it as the user's default schema = ''.join( random.choice(string.ascii_letters) for i in range(10)) dao = SettingsDAO(calculationObject, datasetId, schema=schema) if not importSettings['ConfigOnly']: calculationObject.SetInfo('Creating database') dao.createDatabase() # Creating new database scriptPath = os.path.dirname(os.path.realpath(__file__)) dao.loadFile(scriptPath + "/createdataset.sql") dao.setDatabaseVersion(schemaversion.major, schemaversion.minor) else: #Raises an exception if not present dao.isDatabasePresent() # Verify is major schema version is OK - otherways we can't do config update only currentVersion = dao.getCurrentSchemaVersion() if currentVersion[0] < schemaversion.major: raise Exception( "The database schema of this dataset is outdated. Actualise it by running a full data import or or top N preview import." ) # dao.clearDatasetCatalogs() modules = PluginLoader(calculationObject, datasetId, importSettings, dao=dao) modules.importAll('pre') importer = ImportDataTable(calculationObject, datasetId, importSettings, baseFolder=baseFolder, dao=dao) importer.importAllDataTables() import2D = Import2DDataTable(calculationObject, datasetId, importSettings, baseFolder, dataDir='2D_datatables', dao=dao) import2D.importAll2DTables() globalSettings = importer._globalSettings if ImportRefGenome.ImportRefGenome(calculationObject, datasetId, baseFolder, importSettings, dao): globalSettings['hasGenomeBrowser'] = True ImportDocs(calculationObject, datasetFolder, datasetId) ImportMaps(calculationObject, datasetFolder, datasetId) ImportCustomComponents(calculationObject, datasetFolder, datasetId) #Swap the live default schema dao._execSql('ALTER USER monetdb SET SCHEMA "%s"' % (schema)) #Move the config files to live config = PanoptesConfig(calculationObject) try: os.rename( join(config.getBaseDir(), 'config', '_import_' + datasetId), join(config.getBaseDir(), 'config', datasetId)) except OSError: # Not atomic but I can't se how to make it atomic easily shutil.rmtree(join(config.getBaseDir(), 'config', datasetId)) os.rename( join(config.getBaseDir(), 'config', '_import_' + datasetId), join(config.getBaseDir(), 'config', datasetId)) # Finalise: register dataset with calculationObject.LogHeader('Registering dataset'): dao.registerDataset(globalSettings['name'], importSettings['ConfigOnly']) with calculationObject.LogHeader('Clear cache'): getCache().clear() for old_schema in dao._execSqlQuery( "SELECT name FROM sys.schemas WHERE system=False AND name<>%s", schema): dao._execSql('DROP SCHEMA "%s" CASCADE' % (old_schema)) modules.importAll('post')
def handler(start_response, requestData): try: length = int(requestData['environ'].get('CONTENT_LENGTH', '0')) except ValueError: length = 0 content = requestData['environ']['wsgi.input'].read(length).decode("utf-8") content = json.loads(content) if len(content) > 0 else None if not content: raise SyntaxError('No query parameters supplied') database = content['database'] # Due to caching we check for auth here, as otherwise auth is only checked on DB read. credentials = DQXDbTools.CredentialInformation(requestData) credentials.VerifyCanDo(DQXDbTools.DbOperationRead(database)) tableId = content['table'] query = content['query'] orderBy = json.loads(content.get('orderBy', '[]')) distinct = content.get('distinct', 'false') == 'true' rawColumns = json.loads(content['columns']) columns = list(map(decode, rawColumns)) groupBy = content.get('groupBy', None) startRow, endRow = None, None if content.get('limit', False): startRow, endRow = content['limit'].split('~') startRow = int(startRow) endRow = int(endRow) if startRow < 0: startRow = 0 if endRow <= startRow: endRow = startRow + 1 randomSample = None if content.get('randomSample', False): randomSample = int(content['randomSample']) cacheData = content.get('cache', True) joins = json.loads(content.get('joins', '[]')) auth_query = credentials.get_auth_query(database, [join['foreignTable'] for join in joins] + [tableId]) cache = getCache() cacheKey = json.dumps([tableId, query, orderBy, distinct, columns, groupBy, database, startRow, endRow, joins, auth_query]) data = None if cacheData and randomSample is None: # Don't serve cache on random sample!! try: data = cache[cacheKey] except KeyError: pass if data is None: with DQXDbTools.DBCursor(requestData, database, read_timeout=config.TIMEOUT) as cur: whereClause = DQXDbTools.WhereClause() whereClause.ParameterPlaceHolder = '%s' whereClause.Decode(query, True) if auth_query: whereClause.query = { "whcClass": "compound", "isCompound": True, "isRoot": True, "Components": [ whereClause.query, auth_query ], "Tpe": "AND" } whereClause.CreateSelectStatement() sqlQuery = "SELECT " if distinct: sqlQuery += " DISTINCT " sqlQuery += "{0} FROM {1}".format(','.join(columns), DBTBESC(tableId)) for join in joins: if 'type' in join and join['type'] in ['', 'INNER', 'LEFT', 'RIGHT', 'FULL']: sqlQuery += " {0} JOIN {1} ON {2} = {3}".format(join['type'].upper(), DBTBESC(join['foreignTable']), DBCOLESC(join['foreignColumn']), DBCOLESC(join['column'])) else: raise SyntaxError('Join type not valid') if len(whereClause.querystring_params) > 0: sqlQuery += " WHERE {0}".format(whereClause.querystring_params) if groupBy and len(groupBy) > 0: sqlQuery += " GROUP BY " + ','.join(map(DBCOLESC, groupBy.split('~'))) if len(orderBy) > 0: sqlQuery += " ORDER BY {0}".format( ','.join([DBCOLESC(col) + ' ' + direction for direction, col in orderBy])) if startRow is not None and endRow is not None: sqlQuery += " LIMIT {0} OFFSET {1}".format(endRow - startRow + 1, startRow) if randomSample is not None: sqlQuery += " SAMPLE {0}".format(randomSample) if DQXDbTools.LogRequests: DQXUtils.LogServer('###QRY:' + sqlQuery) DQXUtils.LogServer('###PARAMS:' + str(whereClause.queryparams)) cur.execute(sqlQuery, whereClause.queryparams) rows = cur.fetchall() result = {} for rawCol, (i, desc) in zip(rawColumns, enumerate(cur.description)): # Figure out the name we should return for the column - by deafult monet doesn't qualify names col_name = name(rawCol, desc[0]) dtype = desciptionToDType(desc[1]) if dtype in ['i1', 'i2', 'i4', 'S']: null_value = NULL_VALUES[dtype] result[col_name] = np.array([(row[i].encode('ascii', 'replace') if dtype == 'S' else row[i]) if row[ i] is not None else null_value for row in rows], dtype=dtype) elif desc[1] == 'timestamp': result[col_name] = np.array( [datetimeToJulianDay(row[i]) if row[i] is not None else None for row in rows], dtype=dtype) else: result[col_name] = np.array([row[i] for row in rows], dtype=dtype) data = gzip(data=b''.join(arraybuffer.encode_array_set(list(result.items())))) if cacheData: cache[cacheKey] = data status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip'), ('Access-Control-Allow-Origin', '*') ] start_response(status, response_headers) yield data
def index_table_query(dataset, cur, table, fields, query, auth_query, order, limit, offset, fail_limit, index_field, sample): if limit and fail_limit: raise Exception("Only one type of limit can be specified") where = DQXDbTools.WhereClause() where.ParameterPlaceHolder = '%s'#NOTE!: MySQL PyODDBC seems to require this nonstardard coding where.Decode(query) if auth_query: where.query = { "whcClass": "compound", "isCompound": True, "isRoot": True, "Components": [ where.query, auth_query ], "Tpe": "AND" } where.CreateSelectStatement() if index_field not in fields: fields.append(index_field) if len(where.querystring_params) > 0: query = "WHERE " + where.querystring_params + ' AND ' + DQXDbTools.ToSafeIdentifier(index_field) + ' IS NOT NULL' else: query = "WHERE " + DQXDbTools.DBCOLESC(index_field) + ' IS NOT NULL' fields_string = ','.join('"'+DQXDbTools.ToSafeIdentifier(f)+'"' for f in fields) table = DQXDbTools.ToSafeIdentifier(table) sqlquery = 'SELECT {fields_string} FROM "{table}" {query}'.format(**locals()) if order: sqlquery += ' ORDER BY "{0}"'.format(DQXDbTools.ToSafeIdentifier(order)) params = where.queryparams #Set the limit to one past the req limit = limit or fail_limit if limit: sqlquery += ' LIMIT %s' params.append(int(limit)) if offset: sqlquery += ' OFFSET %s' params.append(int(offset)) if sample is not None: sqlquery += ' SAMPLE {0}'.format(sample) cache = getCache() cacheKey = json.dumps([sqlquery, params]) rows, description = None, None try: rows, description = cache[cacheKey] except KeyError: print('2D', sqlquery, params) pass if rows is None: cur.execute(sqlquery, params) rows = cur.fetchall() description = cur.description #We cache even if random sample is requested such that requests at different points on the col axis pick the same rows and vice-versa - ie we always want the same random sample. cache[cacheKey] = [rows, description] data = {} for i, (field, desc) in enumerate(zip(fields, description)): dtype = desciptionToDType(desc[1]) data[field] = np.array([row[i] for row in rows], dtype=dtype) return data
def handler(start_response, request_data): datatable = request_data['table'] dataset = request_data['dataset'] # Due to caching we check for auth here, as otherwise auth is only checked on DB read. credentials = DQXDbTools.CredentialInformation(request_data) credentials.VerifyCanDo(DQXDbTools.DbOperationRead(dataset)) two_d_properties = request_data['2DProperties'].split('~') col_properties = request_data['colProperties'].split('~') row_properties = request_data['rowProperties'].split('~') col_qry = request_data['colQry'] col_order = request_data['colOrder'] row_qry = request_data['rowQry'] row_order = request_data.get('rowOrder', None) row_order_columns = [] if row_order == 'columns': try: row_order_columns = request_data['rowSortCols'].split('~') except KeyError: pass row_order = None try: col_limit = int(request_data['colLimit']) except KeyError: col_limit = None try: row_limit = int(request_data['rowLimit']) except KeyError: row_limit = None try: col_offset = int(request_data['colOffset']) except KeyError: col_offset = None try: row_offset = int(request_data['rowOffset']) except KeyError: row_offset = None #Set fail limit to one past so we know if we hit it try: col_fail_limit = int(request_data['colFailLimit'])+1 except KeyError: col_fail_limit = None try: row_sort_property = request_data['rowSortProperty'] except KeyError: row_sort_property = None try: col_key = request_data['colKey'] except KeyError: col_key = None try: sort_mode = request_data['sortMode'] except KeyError: sort_mode = None try: row_random_sample = int(request_data['rowRandomSample']) except KeyError: row_random_sample = None col_index_field = datatable + '_column_index' row_index_field = datatable + '_row_index' col_properties.append(col_index_field) row_properties.append(row_index_field) with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_tablename, row_tablename = get_table_ids(cur, dataset, datatable) col_auth_query = credentials.get_auth_query(dataset, [col_tablename]) row_auth_query = credentials.get_auth_query(dataset, [row_tablename]) cache = getCache() cache_key = json.dumps([datatable, dataset, two_d_properties, col_properties, row_properties, col_qry, col_order, row_qry, row_order, row_order_columns, row_random_sample, col_limit, row_limit, col_offset, row_offset, col_fail_limit, row_sort_property, col_key, sort_mode, col_auth_query, row_auth_query]) data = None try: data = cache[cache_key] except KeyError: print('2D Cache miss') pass if data is None: with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_result = index_table_query(dataset, cur, col_tablename, col_properties, col_qry, col_auth_query, col_order, col_limit, col_offset, col_fail_limit, col_index_field, None) if len(row_order_columns) > 0: #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort. row_result = index_table_query(dataset, cur, row_tablename, row_properties, row_qry, row_auth_query, row_order, None, None, None, row_index_field, row_random_sample) else: row_result = index_table_query(dataset, cur, row_tablename, row_properties, row_qry, row_auth_query, row_order, row_limit, row_offset, None, row_index_field, row_random_sample) col_idx = col_result[col_index_field] row_idx = row_result[row_index_field] del col_result[col_index_field] del row_result[row_index_field] if len(col_idx) == col_fail_limit: result_set = [('_over_col_limit', np.array([0], dtype='i1'))] for name, array in list(row_result.items()): result_set.append((('row_'+name), array)) else: if len(row_order_columns) > 0 and len(row_idx) > 0: #Translate primkeys to idx sqlquery = 'SELECT "{col_field}", "{idx_field}" FROM "{table}" WHERE "{col_field}" IN ({params})'.format( idx_field=DQXDbTools.ToSafeIdentifier(col_index_field), table=DQXDbTools.ToSafeIdentifier(col_tablename), params="'"+"','".join(map(DQXDbTools.ToSafeIdentifier, row_order_columns))+"'", col_field=DQXDbTools.ToSafeIdentifier(col_key)) idx_for_col = dict((k, v) for k,v in cur.fetchall()) #Sort by the order specified - reverse so last clicked is major sort sort_col_idx = list(reversed([idx_for_col[key] for key in row_order_columns])) #grab the data needed to sort sort_data = extract2D(dataset, datatable, row_idx, sort_col_idx, [row_sort_property]) rows = list(zip(row_idx, sort_data[row_sort_property])) if sort_mode == 'call': polyploid_key_func = lambda row: ''.join(summarise_call(calls) for calls in row[1]) haploid_key_func = lambda row: ''.join([str(c).zfill(2) for c in row[1]]) if len(rows[0][1].shape) == 1: rows.sort(key=haploid_key_func, reverse=True) else: rows.sort(key=polyploid_key_func, reverse=True) elif sort_mode == 'fraction': for i in range(len(sort_col_idx)): #TODO Shuld be some fancy bayesian shizzle def key_func(row): if sum(row[1][i]) == 0: return '-1' return str(1-float(row[1][i][0])/sum(row[1][i]))+str(sum(row[1][i])).zfill(4) rows.sort(key=key_func, reverse=True) else: print("Unimplemented sort_mode") row_pos_for_idx = dict(list(zip(row_idx, list(range(len(row_idx)))))) #Now just get the row_idx to pass to 2d extract for the slice we need row_idx = np.array(map(itemgetter(0), rows)[row_offset: row_offset+row_limit]) #Use this row idx to retieve the row data from the initial query for name, array in list(row_result.items()): row_result[name] = array[[row_pos_for_idx[idx] for idx in row_idx]] two_d_result = extract2D(dataset, datatable, row_idx, col_idx, two_d_properties) result_set = [] for name, array in list(col_result.items()): result_set.append((('col_'+name), array)) for name, array in list(row_result.items()): result_set.append((('row_'+name), array)) for name, array in list(two_d_result.items()): result_set.append((('2D_'+name), array)) data = gzip(data=b''.join(arraybuffer.encode_array_set(result_set))) cache[cache_key] = data status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip'), ('Access-Control-Allow-Origin', '*') ] start_response(status, response_headers) yield data