예제 #1
0
def handler(start_response, request_data):
    datatable = request_data['datatable']
    dataset = request_data['dataset']
    two_d_properties = request_data['2D_properties'].split('~')
    col_properties = request_data['col_properties'].split('~')
    row_properties = request_data['row_properties'].split('~')
    col_qry = request_data['col_qry']
    col_order = request_data['col_order']
    row_qry = request_data['row_qry']
    row_order = request_data['row_order']
    first_dimension = request_data['first_dimension']

    db = DQXDbTools.OpenDatabase(DQXDbTools.ParseCredentialInfo(request_data), dataset)
    col_table, row_table = get_table_names(db, datatable)

    col_properties.append(datatable + '_column_index')
    row_properties.append(datatable + '_row_index')

    col_result = index_table_query(db,
                                   col_table,
                                   col_properties,
                                   col_qry,
                                   col_order)

    row_result = index_table_query(db,
                                   row_table,
                                   row_properties,
                                   row_qry,
                                   row_order)
    col_idx = col_result[datatable + '_column_index']
    row_idx = row_result[datatable + '_row_index']
    del col_result[datatable + '_column_index']
    del row_result[datatable + '_row_index']
    db.close()
    hdf5_file = h5py.File(os.path.join(config.BASEDIR, '2D_data', dataset+'_' + datatable + '.hdf5'), 'r')

    two_d_properties = dict((prop, None) for prop in two_d_properties)
    for prop in two_d_properties.keys():
        two_d_properties[prop] = hdf5_file[prop]
    if len(col_idx) == 0 or len(row_idx) == 0:
        two_d_result = {}
        for prop in two_d_properties.keys():
            two_d_result[prop] = np.array([], dtype=two_d_properties[prop].id.dtype)
    else:
        two_d_result = select_by_list(two_d_properties, row_idx, col_idx, first_dimension)

    result_set = []
    for name, array in col_result.items():
        result_set.append((('col_'+name), array))
    for name, array in row_result.items():
        result_set.append((('row_'+name), array))
    for name, array in two_d_result.items():
        result_set.append((('2D_'+name), array))
    data = gzip(''.join(arraybuffer.encode_array_set(result_set)))
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip')]
    start_response(status, response_headers)
    yield data
예제 #2
0
def handler(start_response, request_data):
    datatable = request_data['datatable']
    dataset = request_data['dataset']
    two_d_properties = request_data['2D_properties'].split('~')
    col_properties = request_data['col_properties'].split('~')
    row_properties = request_data['row_properties'].split('~')
    col_qry = request_data['col_qry']
    col_order = request_data['col_order']
    row_qry = request_data['row_qry']
    row_order = request_data['row_order']
    first_dimension = request_data['first_dimension']

    db = DQXDbTools.OpenDatabase(DQXDbTools.ParseCredentialInfo(request_data),
                                 dataset)
    col_table, row_table = get_table_names(db, datatable)

    col_properties.append(datatable + '_column_index')
    row_properties.append(datatable + '_row_index')

    col_result = index_table_query(db, col_table, col_properties, col_qry,
                                   col_order)

    row_result = index_table_query(db, row_table, row_properties, row_qry,
                                   row_order)
    col_idx = col_result[datatable + '_column_index']
    row_idx = row_result[datatable + '_row_index']
    del col_result[datatable + '_column_index']
    del row_result[datatable + '_row_index']
    db.close()
    hdf5_file = h5py.File(
        os.path.join(config.BASEDIR, '2D_data',
                     dataset + '_' + datatable + '.hdf5'), 'r')

    two_d_properties = dict((prop, None) for prop in two_d_properties)
    for prop in two_d_properties.keys():
        two_d_properties[prop] = hdf5_file[prop]
    if len(col_idx) == 0 or len(row_idx) == 0:
        two_d_result = {}
        for prop in two_d_properties.keys():
            two_d_result[prop] = np.array(
                [], dtype=two_d_properties[prop].id.dtype)
    else:
        two_d_result = select_by_list(two_d_properties, row_idx, col_idx,
                                      first_dimension)

    result_set = []
    for name, array in col_result.items():
        result_set.append((('col_' + name), array))
    for name, array in row_result.items():
        result_set.append((('row_' + name), array))
    for name, array in two_d_result.items():
        result_set.append((('2D_' + name), array))
    data = gzip(''.join(arraybuffer.encode_array_set(result_set)))
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip')]
    start_response(status, response_headers)
    yield data
예제 #3
0
def handler(start_response, requestData):
    try:
        length = int(requestData['environ'].get('CONTENT_LENGTH', '0'))
    except ValueError:
        length = 0
    content = requestData['environ']['wsgi.input'].read(length).decode("utf-8")
    content = json.loads(content) if len(content) > 0 else None
    if not content:
        raise SyntaxError('No query parameters supplied')
    database = content['database']

    # Due to caching we check for auth here, as otherwise auth is only checked on DB read.
    credentials = DQXDbTools.CredentialInformation(requestData)
    credentials.VerifyCanDo(DQXDbTools.DbOperationRead(database))

    tableId = content['table']
    query = content['query']
    orderBy = json.loads(content.get('orderBy', '[]'))
    distinct = content.get('distinct', 'false') == 'true'
    rawColumns = json.loads(content['columns'])
    columns = list(map(decode, rawColumns))
    groupBy = content.get('groupBy', None)
    startRow, endRow = None, None
    if content.get('limit', False):
        startRow, endRow = content['limit'].split('~')
        startRow = int(startRow)
        endRow = int(endRow)
        if startRow < 0:
            startRow = 0
        if endRow <= startRow:
            endRow = startRow + 1
    randomSample = None
    if content.get('randomSample', False):
        randomSample = int(content['randomSample'])
    cacheData = content.get('cache', True)
    joins = json.loads(content.get('joins', '[]'))

    auth_query = credentials.get_auth_query(
        database, [join['foreignTable'] for join in joins] + [tableId])

    cache = getCache()
    cacheKey = json.dumps([
        tableId, query, orderBy, distinct, columns, groupBy, database,
        startRow, endRow, joins, auth_query
    ])
    data = None
    if cacheData and randomSample is None:  # Don't serve cache on random sample!!
        try:
            data = cache[cacheKey]
        except KeyError:
            pass

    if data is None:
        with DQXDbTools.DBCursor(requestData,
                                 database,
                                 read_timeout=config.TIMEOUT) as cur:

            whereClause = DQXDbTools.WhereClause()
            whereClause.ParameterPlaceHolder = '%s'
            whereClause.Decode(query, True)
            if auth_query:
                whereClause.query = {
                    "whcClass": "compound",
                    "isCompound": True,
                    "isRoot": True,
                    "Components": [whereClause.query, auth_query],
                    "Tpe": "AND"
                }
            whereClause.CreateSelectStatement()

            sqlQuery = "SELECT "
            if distinct:
                sqlQuery += " DISTINCT "
            sqlQuery += "{0} FROM {1}".format(','.join(columns),
                                              DBTBESC(tableId))
            for join in joins:
                if 'type' in join and join['type'] in [
                        '', 'INNER', 'LEFT', 'RIGHT', 'FULL'
                ]:
                    sqlQuery += " {0} JOIN {1} ON {2} = {3}".format(
                        join['type'].upper(), DBTBESC(join['foreignTable']),
                        DBCOLESC(join['foreignColumn']),
                        DBCOLESC(join['column']))
                else:
                    raise SyntaxError('Join type not valid')
            if len(whereClause.querystring_params) > 0:
                sqlQuery += " WHERE {0}".format(whereClause.querystring_params)
            if groupBy and len(groupBy) > 0:
                sqlQuery += " GROUP BY " + ','.join(
                    map(DBCOLESC, groupBy.split('~')))
            if len(orderBy) > 0:
                sqlQuery += " ORDER BY {0}".format(','.join([
                    DBCOLESC(col) + ' ' + direction
                    for direction, col in orderBy
                ]))
            if startRow is not None and endRow is not None:
                sqlQuery += " LIMIT {0} OFFSET {1}".format(
                    endRow - startRow + 1, startRow)
            if randomSample is not None:
                sqlQuery += " SAMPLE {0}".format(randomSample)

            if DQXDbTools.LogRequests:
                DQXUtils.LogServer('###QRY:' + sqlQuery)
                DQXUtils.LogServer('###PARAMS:' + str(whereClause.queryparams))
            cur.execute(sqlQuery, whereClause.queryparams)
            rows = cur.fetchall()
            result = {}
            for rawCol, (i, desc) in zip(rawColumns,
                                         enumerate(cur.description)):
                # Figure out the name we should return for the column - by deafult monet doesn't qualify names
                col_name = name(rawCol, desc[0])
                dtype = desciptionToDType(desc[1])
                if dtype in ['i1', 'i2', 'i4', 'S']:
                    null_value = NULL_VALUES[dtype]
                    result[col_name] = np.array(
                        [(str(row[i]).encode('utf-8') if dtype == 'S' else
                          row[i]) if row[i] is not None else null_value
                         for row in rows],
                        dtype=dtype)
                else:
                    result[col_name] = np.array([row[i] for row in rows],
                                                dtype=dtype)
            data = gzip(data=b''.join(
                arraybuffer.encode_array_set(list(result.items()))))
            if cacheData:
                cache[cacheKey] = data
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip'),
                        ('Access-Control-Allow-Origin', '*')]
    start_response(status, response_headers)
    yield data
예제 #4
0
def handler(start_response, request_data):
    datatable = request_data['table']
    dataset = request_data['dataset']

    # Due to caching we check for auth here, as otherwise auth is only checked on DB read.
    credentials = DQXDbTools.CredentialInformation(request_data)
    credentials.VerifyCanDo(DQXDbTools.DbOperationRead(dataset))

    two_d_properties = request_data['2DProperties'].split('~')
    col_properties = request_data['colProperties'].split('~')
    row_properties = request_data['rowProperties'].split('~')
    col_qry = request_data['colQry']
    col_order = request_data['colOrder']
    row_qry = request_data['rowQry']
    row_order = request_data.get('rowOrder', None)
    row_order_columns = []
    if row_order == 'columns':
        try:
            row_order_columns = request_data['rowSortCols'].split('~')
        except KeyError:
            pass
        row_order = None
    try:
        col_limit = int(request_data['colLimit'])
    except KeyError:
        col_limit = None
    try:
        row_limit = int(request_data['rowLimit'])
    except KeyError:
        row_limit = None
    try:
        col_offset = int(request_data['colOffset'])
    except KeyError:
        col_offset = None
    try:
        row_offset = int(request_data['rowOffset'])
    except KeyError:
        row_offset = None
    #Set fail limit to one past so we know if we hit it
    try:
        col_fail_limit = int(request_data['colFailLimit']) + 1
    except KeyError:
        col_fail_limit = None
    try:
        row_sort_property = request_data['rowSortProperty']
    except KeyError:
        row_sort_property = None
    try:
        col_key = request_data['colKey']
    except KeyError:
        col_key = None
    try:
        sort_mode = request_data['sortMode']
    except KeyError:
        sort_mode = None
    try:
        row_random_sample = int(request_data['rowRandomSample'])
    except KeyError:
        row_random_sample = None

    col_index_field = datatable + '_column_index'
    row_index_field = datatable + '_row_index'
    col_properties.append(col_index_field)
    row_properties.append(row_index_field)

    with DQXDbTools.DBCursor(request_data,
                             dataset,
                             read_timeout=config.TIMEOUT) as cur:
        col_tablename, row_tablename = get_table_ids(cur, dataset, datatable)

    col_auth_query = credentials.get_auth_query(dataset, [col_tablename])
    row_auth_query = credentials.get_auth_query(dataset, [row_tablename])

    cache = getCache()
    cache_key = json.dumps([
        datatable, dataset, two_d_properties, col_properties, row_properties,
        col_qry, col_order, row_qry, row_order, row_order_columns,
        row_random_sample, col_limit, row_limit, col_offset, row_offset,
        col_fail_limit, row_sort_property, col_key, sort_mode, col_auth_query,
        row_auth_query
    ])
    data = None
    try:
        data = cache[cache_key]
    except KeyError:
        print('2D Cache miss')
        pass

    if data is None:
        with DQXDbTools.DBCursor(request_data,
                                 dataset,
                                 read_timeout=config.TIMEOUT) as cur:
            col_result = index_table_query(dataset, cur, col_tablename,
                                           col_properties, col_qry,
                                           col_auth_query, col_order,
                                           col_limit, col_offset,
                                           col_fail_limit, col_index_field,
                                           None)

            if len(row_order_columns) > 0:
                #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort.
                row_result = index_table_query(dataset, cur, row_tablename,
                                               row_properties, row_qry,
                                               row_auth_query, row_order, None,
                                               None, None, row_index_field,
                                               row_random_sample)

            else:
                row_result = index_table_query(dataset, cur, row_tablename,
                                               row_properties, row_qry,
                                               row_auth_query, row_order,
                                               row_limit, row_offset, None,
                                               row_index_field,
                                               row_random_sample)

            col_idx = col_result[col_index_field]
            row_idx = row_result[row_index_field]
            del col_result[col_index_field]
            del row_result[row_index_field]
            if len(col_idx) == col_fail_limit:
                result_set = [('_over_col_limit', np.array([0], dtype='i1'))]
                for name, array in list(row_result.items()):
                    result_set.append((('row_' + name), array))
            else:
                if len(row_order_columns) > 0 and len(row_idx) > 0:
                    #Translate primkeys to idx
                    sqlquery = 'SELECT "{col_field}", "{idx_field}" FROM "{table}" WHERE "{col_field}" IN ({params})'.format(
                        idx_field=DQXDbTools.ToSafeIdentifier(col_index_field),
                        table=DQXDbTools.ToSafeIdentifier(col_tablename),
                        params="'" + "','".join(
                            map(DQXDbTools.ToSafeIdentifier,
                                row_order_columns)) + "'",
                        col_field=DQXDbTools.ToSafeIdentifier(col_key))
                    idx_for_col = dict((k, v) for k, v in cur.fetchall())
                    #Sort by the order specified - reverse so last clicked is major sort
                    sort_col_idx = list(
                        reversed(
                            [idx_for_col[key] for key in row_order_columns]))
                    #grab the data needed to sort
                    sort_data = extract2D(dataset, datatable, row_idx,
                                          sort_col_idx, [row_sort_property])
                    rows = list(zip(row_idx, sort_data[row_sort_property]))
                    if sort_mode == 'call':
                        polyploid_key_func = lambda row: ''.join(
                            summarise_call(calls) for calls in row[1])
                        haploid_key_func = lambda row: ''.join(
                            [str(c).zfill(2) for c in row[1]])
                        if len(rows[0][1].shape) == 1:
                            rows.sort(key=haploid_key_func, reverse=True)
                        else:
                            rows.sort(key=polyploid_key_func, reverse=True)
                    elif sort_mode == 'fraction':
                        for i in range(len(sort_col_idx)):
                            #TODO Shuld be some fancy bayesian shizzle
                            def key_func(row):
                                if sum(row[1][i]) == 0:
                                    return '-1'
                                return str(1 - float(row[1][i][0]) /
                                           sum(row[1][i])) + str(sum(
                                               row[1][i])).zfill(4)

                            rows.sort(key=key_func, reverse=True)
                    else:
                        print("Unimplemented sort_mode")
                    row_pos_for_idx = dict(
                        list(zip(row_idx, list(range(len(row_idx))))))
                    #Now just get the row_idx to pass to 2d extract for the slice we need
                    row_idx = np.array(
                        map(itemgetter(0),
                            rows)[row_offset:row_offset + row_limit])
                    #Use this row idx to retieve the row data from the initial query
                    for name, array in list(row_result.items()):
                        row_result[name] = array[[
                            row_pos_for_idx[idx] for idx in row_idx
                        ]]

                two_d_result = extract2D(dataset, datatable, row_idx, col_idx,
                                         two_d_properties)

                result_set = []
                for name, array in list(col_result.items()):
                    result_set.append((('col_' + name), array))
                for name, array in list(row_result.items()):
                    result_set.append((('row_' + name), array))
                for name, array in list(two_d_result.items()):
                    result_set.append((('2D_' + name), array))
        data = gzip(data=b''.join(arraybuffer.encode_array_set(result_set)))
        cache[cache_key] = data
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip'),
                        ('Access-Control-Allow-Origin', '*')]
    start_response(status, response_headers)
    yield data
예제 #5
0
파일: query.py 프로젝트: cggh/panoptes
def handler(start_response, requestData):
    try:
        length = int(requestData['environ'].get('CONTENT_LENGTH', '0'))
    except ValueError:
        length = 0
    content = requestData['environ']['wsgi.input'].read(length).decode("utf-8")
    content = json.loads(content) if len(content) > 0 else None
    if not content:
        raise SyntaxError('No query parameters supplied')
    database = content['database']

    # Due to caching we check for auth here, as otherwise auth is only checked on DB read.
    credentials = DQXDbTools.CredentialInformation(requestData)
    credentials.VerifyCanDo(DQXDbTools.DbOperationRead(database))

    tableId = content['table']
    query = content['query']
    orderBy = json.loads(content.get('orderBy', '[]'))
    distinct = content.get('distinct', 'false') == 'true'
    rawColumns = json.loads(content['columns'])
    columns = list(map(decode, rawColumns))
    groupBy = content.get('groupBy', None)
    startRow, endRow = None, None
    if content.get('limit', False):
        startRow, endRow = content['limit'].split('~')
        startRow = int(startRow)
        endRow = int(endRow)
        if startRow < 0:
            startRow = 0
        if endRow <= startRow:
            endRow = startRow + 1
    randomSample = None
    if content.get('randomSample', False):
        randomSample = int(content['randomSample'])
    cacheData = content.get('cache', True)
    joins = json.loads(content.get('joins', '[]'))

    auth_query = credentials.get_auth_query(database, [join['foreignTable'] for join in joins] + [tableId])

    cache = getCache()
    cacheKey = json.dumps([tableId, query, orderBy, distinct, columns, groupBy,
                           database, startRow, endRow, joins, auth_query])
    data = None
    if cacheData and randomSample is None:  # Don't serve cache on random sample!!
        try:
            data = cache[cacheKey]
        except KeyError:
            pass

    if data is None:
        with DQXDbTools.DBCursor(requestData, database, read_timeout=config.TIMEOUT) as cur:

            whereClause = DQXDbTools.WhereClause()
            whereClause.ParameterPlaceHolder = '%s'
            whereClause.Decode(query, True)
            if auth_query:
                whereClause.query = {
                    "whcClass": "compound",
                    "isCompound": True,
                    "isRoot": True,
                    "Components": [
                        whereClause.query,
                        auth_query
                    ],
                    "Tpe": "AND"
                }
            whereClause.CreateSelectStatement()

            sqlQuery = "SELECT "
            if distinct:
                sqlQuery += " DISTINCT "
            sqlQuery += "{0} FROM {1}".format(','.join(columns), DBTBESC(tableId))
            for join in joins:
                if 'type' in join and join['type'] in ['', 'INNER', 'LEFT', 'RIGHT', 'FULL']:
                    sqlQuery += " {0} JOIN {1} ON {2} = {3}".format(join['type'].upper(), DBTBESC(join['foreignTable']),
                                                                    DBCOLESC(join['foreignColumn']),
                                                                    DBCOLESC(join['column']))
                else:
                    raise SyntaxError('Join type not valid')
            if len(whereClause.querystring_params) > 0:
                sqlQuery += " WHERE {0}".format(whereClause.querystring_params)
            if groupBy and len(groupBy) > 0:
                sqlQuery += " GROUP BY " + ','.join(map(DBCOLESC, groupBy.split('~')))
            if len(orderBy) > 0:
                sqlQuery += " ORDER BY {0}".format(
                    ','.join([DBCOLESC(col) + ' ' + direction for direction, col in orderBy]))
            if startRow is not None and endRow is not None:
                sqlQuery += " LIMIT {0} OFFSET {1}".format(endRow - startRow + 1, startRow)
            if randomSample is not None:
                sqlQuery += " SAMPLE {0}".format(randomSample)

            if DQXDbTools.LogRequests:
                DQXUtils.LogServer('###QRY:' + sqlQuery)
                DQXUtils.LogServer('###PARAMS:' + str(whereClause.queryparams))
            cur.execute(sqlQuery, whereClause.queryparams)
            rows = cur.fetchall()
            result = {}
            for rawCol, (i, desc) in zip(rawColumns, enumerate(cur.description)):
                # Figure out the name we should return for the column - by deafult monet doesn't qualify names
                col_name = name(rawCol, desc[0])
                dtype = desciptionToDType(desc[1])
                if dtype in ['i1', 'i2', 'i4', 'S']:
                    null_value = NULL_VALUES[dtype]
                    result[col_name] = np.array([(row[i].encode('ascii', 'replace') if dtype == 'S' else row[i]) if row[
                                                                                                                        i] is not None else null_value
                                                 for row in rows], dtype=dtype)
                elif desc[1] == 'timestamp':
                    result[col_name] = np.array(
                        [datetimeToJulianDay(row[i]) if row[i] is not None else None for row in rows], dtype=dtype)
                else:
                    result[col_name] = np.array([row[i] for row in rows], dtype=dtype)
            data = gzip(data=b''.join(arraybuffer.encode_array_set(list(result.items()))))
            if cacheData:
                cache[cacheKey] = data
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip'),
                        ('Access-Control-Allow-Origin', '*')
                        ]
    start_response(status, response_headers)
    yield data
예제 #6
0
def handler(start_response, request_data):
    datatable = request_data['datatable']
    dataset = request_data['dataset']
    workspace = request_data['workspace']
    two_d_properties = request_data['2D_properties'].split('~')
    col_properties = request_data['col_properties'].split('~')
    row_properties = request_data['row_properties'].split('~')
    col_qry = request_data['col_qry']
    col_order = request_data['col_order']
    row_qry = request_data['row_qry']
    row_order = request_data['row_order']
    row_order_columns = []
    if row_order == 'columns':
        try:
            row_order_columns = request_data['row_sort_cols'].split('~')
        except KeyError:
            pass
        row_order = 'NULL'
    first_dimension = request_data['first_dimension']
    try:
        col_limit = int(request_data['col_limit'])
    except KeyError:
        col_limit = None
    try:
        row_limit = int(request_data['row_limit'])
    except KeyError:
        row_limit = None
    try:
        col_offset = int(request_data['col_offset'])
    except KeyError:
        col_offset = None
    try:
        row_offset = int(request_data['row_offset'])
    except KeyError:
        row_offset = None
    #Set fail limit to one past so we know if we hit it
    try:
        col_fail_limit = int(request_data['col_fail_limit'])+1
    except KeyError:
        col_fail_limit = None
    try:
        row_sort_property = request_data['row_sort_property']
    except KeyError:
        row_sort_property = None
    try:
        col_key = request_data['col_key']
    except KeyError:
        col_key = None
    try:
        sort_mode = request_data['sort_mode']
    except KeyError:
        sort_mode = None


    col_index_field = datatable + '_column_index'
    row_index_field = datatable + '_row_index'
    col_properties.append(col_index_field)
    row_properties.append(row_index_field)

    with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur:
        col_tableid, row_tableid = get_table_ids(cur, datatable)
        col_tablename = get_workspace_table_name(col_tableid, workspace)
        row_tablename = get_workspace_table_name(row_tableid, workspace)

        col_result = index_table_query(cur,
                                       col_tablename,
                                       col_properties,
                                       col_qry,
                                       col_order,
                                       col_limit,
                                       col_offset,
                                       col_fail_limit,
                                       col_index_field)

        if len(row_order_columns) > 0:
            #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort.
            row_result = index_table_query(cur,
                                           row_tablename,
                                           row_properties,
                                           row_qry,
                                           row_order,
                                           None,
                                           None,
                                           None,
                                           row_index_field)

        else:
            row_result = index_table_query(cur,
                                           row_tablename,
                                           row_properties,
                                           row_qry,
                                           row_order,
                                           row_limit,
                                           row_offset,
                                           None,
                                           row_index_field)

        col_idx = col_result[col_index_field]
        row_idx = row_result[row_index_field]
        if len(col_idx) == col_fail_limit:
            result_set = [('_over_col_limit', np.array([0], dtype='i1'))]
        else:
            del col_result[col_index_field]
            del row_result[row_index_field]

            if len(row_order_columns) > 0 and len(row_idx) > 0:
                #Translate primkeys to idx
                sqlquery = "SELECT {col_field}, {idx_field} FROM {table} WHERE {col_field} IN ({params})".format(
                    idx_field=DQXDbTools.ToSafeIdentifier(col_index_field),
                    table=DQXDbTools.ToSafeIdentifier(col_tablename),
                    params="'"+"','".join(map(DQXDbTools.ToSafeIdentifier, row_order_columns))+"'",
                    col_field=DQXDbTools.ToSafeIdentifier(col_key))
                print sqlquery
                cur.execute(sqlquery)
                idx_for_col = dict((k, v) for k,v in cur.fetchall())
                #Sort by the order specified - reverse so last clicked is major sort
                sort_col_idx = list(reversed(map(lambda key: idx_for_col[key], row_order_columns)))
                #grab the data needed to sort
                sort_data = extract2D(dataset, datatable, row_idx, sort_col_idx, first_dimension, [row_sort_property])
                rows = zip(row_idx, sort_data[row_sort_property])
                if sort_mode == 'call':
                    polyploid_key_func = lambda row: ''.join(summarise_call(calls) for calls in row[1])
                    haploid_key_func = lambda row: ''.join(map(lambda c: str(c).zfill(2), row[1]))
                    if len(rows[0][1].shape) == 1:
                        rows.sort(key=haploid_key_func, reverse=True)
                    else:
                        rows.sort(key=polyploid_key_func, reverse=True)
                elif sort_mode == 'fraction':
                    for i in range(len(sort_col_idx)):
                        #TODO Shuld be some fancy bayesian shizzle
                        def key_func(row):
                            if sum(row[1][i]) == 0:
                                return '-1'
                            return str(1-float(row[1][i][0])/sum(row[1][i]))+str(sum(row[1][i])).zfill(4)
                        rows.sort(key=key_func, reverse=True)
                else:
                    print "Unimplemented sort_mode"
                row_pos_for_idx = dict(zip(row_idx, range(len(row_idx))))
                #Now just get the row_idx to pass to 2d extract for the slice we need
                row_idx = np.array(map(itemgetter(0), rows)[row_offset: row_offset+row_limit])
                #Use this row idx to retieve the row data from the initial query
                for name, array in row_result.items():
                    row_result[name] = array[[row_pos_for_idx[idx] for idx in row_idx]]

            two_d_result = extract2D(dataset, datatable, row_idx, col_idx, first_dimension, two_d_properties)

            result_set = []
            for name, array in col_result.items():
                result_set.append((('col_'+name), array))
            for name, array in row_result.items():
                result_set.append((('row_'+name), array))
            for name, array in two_d_result.items():
                result_set.append((('2D_'+name), array))
    data = gzip(data=''.join(arraybuffer.encode_array_set(result_set)))
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip')]
    start_response(status, response_headers)
    yield data
예제 #7
0
파일: 2d_query.py 프로젝트: cggh/panoptes
def handler(start_response, request_data):
    datatable = request_data['table']
    dataset = request_data['dataset']

    # Due to caching we check for auth here, as otherwise auth is only checked on DB read.
    credentials = DQXDbTools.CredentialInformation(request_data)
    credentials.VerifyCanDo(DQXDbTools.DbOperationRead(dataset))

    two_d_properties = request_data['2DProperties'].split('~')
    col_properties = request_data['colProperties'].split('~')
    row_properties = request_data['rowProperties'].split('~')
    col_qry = request_data['colQry']
    col_order = request_data['colOrder']
    row_qry = request_data['rowQry']
    row_order = request_data.get('rowOrder', None)
    row_order_columns = []
    if row_order == 'columns':
        try:
            row_order_columns = request_data['rowSortCols'].split('~')
        except KeyError:
            pass
        row_order = None
    try:
        col_limit = int(request_data['colLimit'])
    except KeyError:
        col_limit = None
    try:
        row_limit = int(request_data['rowLimit'])
    except KeyError:
        row_limit = None
    try:
        col_offset = int(request_data['colOffset'])
    except KeyError:
        col_offset = None
    try:
        row_offset = int(request_data['rowOffset'])
    except KeyError:
        row_offset = None
    #Set fail limit to one past so we know if we hit it
    try:
        col_fail_limit = int(request_data['colFailLimit'])+1
    except KeyError:
        col_fail_limit = None
    try:
        row_sort_property = request_data['rowSortProperty']
    except KeyError:
        row_sort_property = None
    try:
        col_key = request_data['colKey']
    except KeyError:
        col_key = None
    try:
        sort_mode = request_data['sortMode']
    except KeyError:
        sort_mode = None
    try:
        row_random_sample = int(request_data['rowRandomSample'])
    except KeyError:
        row_random_sample = None

    col_index_field = datatable + '_column_index'
    row_index_field = datatable + '_row_index'
    col_properties.append(col_index_field)
    row_properties.append(row_index_field)

    with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur:
        col_tablename, row_tablename = get_table_ids(cur, dataset, datatable)

    col_auth_query = credentials.get_auth_query(dataset, [col_tablename])
    row_auth_query = credentials.get_auth_query(dataset, [row_tablename])

    cache = getCache()
    cache_key = json.dumps([datatable, dataset, two_d_properties, col_properties, row_properties, col_qry, col_order,
                           row_qry, row_order, row_order_columns, row_random_sample, col_limit, row_limit, col_offset,
                           row_offset, col_fail_limit, row_sort_property, col_key, sort_mode, col_auth_query, row_auth_query])
    data = None
    try:
        data = cache[cache_key]
    except KeyError:
        print('2D Cache miss')
        pass

    if data is None:
        with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur:
            col_result = index_table_query(dataset,
                                           cur,
                                           col_tablename,
                                           col_properties,
                                           col_qry,
                                           col_auth_query,
                                           col_order,
                                           col_limit,
                                           col_offset,
                                           col_fail_limit,
                                           col_index_field,
                                           None)

            if len(row_order_columns) > 0:
                #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort.
                row_result = index_table_query(dataset,
                                               cur,
                                               row_tablename,
                                               row_properties,
                                               row_qry,
                                               row_auth_query,
                                               row_order,
                                               None,
                                               None,
                                               None,
                                               row_index_field,
                                               row_random_sample)

            else:
                row_result = index_table_query(dataset,
                                               cur,
                                               row_tablename,
                                               row_properties,
                                               row_qry,
                                               row_auth_query,
                                               row_order,
                                               row_limit,
                                               row_offset,
                                               None,
                                               row_index_field,
                                               row_random_sample)

            col_idx = col_result[col_index_field]
            row_idx = row_result[row_index_field]
            del col_result[col_index_field]
            del row_result[row_index_field]
            if len(col_idx) == col_fail_limit:
                result_set = [('_over_col_limit', np.array([0], dtype='i1'))]
                for name, array in list(row_result.items()):
                    result_set.append((('row_'+name), array))
            else:
                if len(row_order_columns) > 0 and len(row_idx) > 0:
                    #Translate primkeys to idx
                    sqlquery = 'SELECT "{col_field}", "{idx_field}" FROM "{table}" WHERE "{col_field}" IN ({params})'.format(
                        idx_field=DQXDbTools.ToSafeIdentifier(col_index_field),
                        table=DQXDbTools.ToSafeIdentifier(col_tablename),
                        params="'"+"','".join(map(DQXDbTools.ToSafeIdentifier, row_order_columns))+"'",
                        col_field=DQXDbTools.ToSafeIdentifier(col_key))
                    idx_for_col = dict((k, v) for k,v in cur.fetchall())
                    #Sort by the order specified - reverse so last clicked is major sort
                    sort_col_idx = list(reversed([idx_for_col[key] for key in row_order_columns]))
                    #grab the data needed to sort
                    sort_data = extract2D(dataset, datatable, row_idx, sort_col_idx, [row_sort_property])
                    rows = list(zip(row_idx, sort_data[row_sort_property]))
                    if sort_mode == 'call':
                        polyploid_key_func = lambda row: ''.join(summarise_call(calls) for calls in row[1])
                        haploid_key_func = lambda row: ''.join([str(c).zfill(2) for c in row[1]])
                        if len(rows[0][1].shape) == 1:
                            rows.sort(key=haploid_key_func, reverse=True)
                        else:
                            rows.sort(key=polyploid_key_func, reverse=True)
                    elif sort_mode == 'fraction':
                        for i in range(len(sort_col_idx)):
                            #TODO Shuld be some fancy bayesian shizzle
                            def key_func(row):
                                if sum(row[1][i]) == 0:
                                    return '-1'
                                return str(1-float(row[1][i][0])/sum(row[1][i]))+str(sum(row[1][i])).zfill(4)
                            rows.sort(key=key_func, reverse=True)
                    else:
                        print("Unimplemented sort_mode")
                    row_pos_for_idx = dict(list(zip(row_idx, list(range(len(row_idx))))))
                    #Now just get the row_idx to pass to 2d extract for the slice we need
                    row_idx = np.array(map(itemgetter(0), rows)[row_offset: row_offset+row_limit])
                    #Use this row idx to retieve the row data from the initial query
                    for name, array in list(row_result.items()):
                        row_result[name] = array[[row_pos_for_idx[idx] for idx in row_idx]]

                two_d_result = extract2D(dataset, datatable, row_idx, col_idx, two_d_properties)

                result_set = []
                for name, array in list(col_result.items()):
                    result_set.append((('col_'+name), array))
                for name, array in list(row_result.items()):
                    result_set.append((('row_'+name), array))
                for name, array in list(two_d_result.items()):
                    result_set.append((('2D_'+name), array))
        data = gzip(data=b''.join(arraybuffer.encode_array_set(result_set)))
        cache[cache_key] = data
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip'),
                        ('Access-Control-Allow-Origin', '*')
                        ]
    start_response(status, response_headers)
    yield data