コード例 #1
0
ファイル: 2d_query.py プロジェクト: pombredanne/panoptes
def handler(start_response, request_data):
    datatable = request_data['datatable']
    dataset = request_data['dataset']
    two_d_properties = request_data['2D_properties'].split('~')
    col_properties = request_data['col_properties'].split('~')
    row_properties = request_data['row_properties'].split('~')
    col_qry = request_data['col_qry']
    col_order = request_data['col_order']
    row_qry = request_data['row_qry']
    row_order = request_data['row_order']
    first_dimension = request_data['first_dimension']

    db = DQXDbTools.OpenDatabase(DQXDbTools.ParseCredentialInfo(request_data), dataset)
    col_table, row_table = get_table_names(db, datatable)

    col_properties.append(datatable + '_column_index')
    row_properties.append(datatable + '_row_index')

    col_result = index_table_query(db,
                                   col_table,
                                   col_properties,
                                   col_qry,
                                   col_order)

    row_result = index_table_query(db,
                                   row_table,
                                   row_properties,
                                   row_qry,
                                   row_order)
    col_idx = col_result[datatable + '_column_index']
    row_idx = row_result[datatable + '_row_index']
    del col_result[datatable + '_column_index']
    del row_result[datatable + '_row_index']
    db.close()
    hdf5_file = h5py.File(os.path.join(config.BASEDIR, '2D_data', dataset+'_' + datatable + '.hdf5'), 'r')

    two_d_properties = dict((prop, None) for prop in two_d_properties)
    for prop in two_d_properties.keys():
        two_d_properties[prop] = hdf5_file[prop]
    if len(col_idx) == 0 or len(row_idx) == 0:
        two_d_result = {}
        for prop in two_d_properties.keys():
            two_d_result[prop] = np.array([], dtype=two_d_properties[prop].id.dtype)
    else:
        two_d_result = select_by_list(two_d_properties, row_idx, col_idx, first_dimension)

    result_set = []
    for name, array in col_result.items():
        result_set.append((('col_'+name), array))
    for name, array in row_result.items():
        result_set.append((('row_'+name), array))
    for name, array in two_d_result.items():
        result_set.append((('2D_'+name), array))
    data = gzip(''.join(arraybuffer.encode_array_set(result_set)))
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip')]
    start_response(status, response_headers)
    yield data
コード例 #2
0
ファイル: 2d_query.py プロジェクト: TAlexPerkins/panoptes
def handler(start_response, request_data):
    datatable = request_data['datatable']
    dataset = request_data['dataset']
    two_d_properties = request_data['2D_properties'].split('~')
    col_properties = request_data['col_properties'].split('~')
    row_properties = request_data['row_properties'].split('~')
    col_qry = request_data['col_qry']
    col_order = request_data['col_order']
    row_qry = request_data['row_qry']
    row_order = request_data['row_order']
    first_dimension = request_data['first_dimension']

    db = DQXDbTools.OpenDatabase(DQXDbTools.ParseCredentialInfo(request_data),
                                 dataset)
    col_table, row_table = get_table_names(db, datatable)

    col_properties.append(datatable + '_column_index')
    row_properties.append(datatable + '_row_index')

    col_result = index_table_query(db, col_table, col_properties, col_qry,
                                   col_order)

    row_result = index_table_query(db, row_table, row_properties, row_qry,
                                   row_order)
    col_idx = col_result[datatable + '_column_index']
    row_idx = row_result[datatable + '_row_index']
    del col_result[datatable + '_column_index']
    del row_result[datatable + '_row_index']
    db.close()
    hdf5_file = h5py.File(
        os.path.join(config.BASEDIR, '2D_data',
                     dataset + '_' + datatable + '.hdf5'), 'r')

    two_d_properties = dict((prop, None) for prop in two_d_properties)
    for prop in two_d_properties.keys():
        two_d_properties[prop] = hdf5_file[prop]
    if len(col_idx) == 0 or len(row_idx) == 0:
        two_d_result = {}
        for prop in two_d_properties.keys():
            two_d_result[prop] = np.array(
                [], dtype=two_d_properties[prop].id.dtype)
    else:
        two_d_result = select_by_list(two_d_properties, row_idx, col_idx,
                                      first_dimension)

    result_set = []
    for name, array in col_result.items():
        result_set.append((('col_' + name), array))
    for name, array in row_result.items():
        result_set.append((('row_' + name), array))
    for name, array in two_d_result.items():
        result_set.append((('2D_' + name), array))
    data = gzip(''.join(arraybuffer.encode_array_set(result_set)))
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip')]
    start_response(status, response_headers)
    yield data
コード例 #3
0
ファイル: 2d_query.py プロジェクト: gitter-badger/panoptes
def handler(start_response, request_data):
    datatable = request_data['datatable']
    dataset = request_data['dataset']
    workspace = request_data['workspace']
    two_d_properties = request_data['2D_properties'].split('~')
    col_properties = request_data['col_properties'].split('~')
    row_properties = request_data['row_properties'].split('~')
    col_qry = request_data['col_qry']
    col_order = request_data['col_order']
    row_qry = request_data['row_qry']
    row_order = request_data['row_order']
    row_order_columns = []
    if row_order == 'columns':
        try:
            row_order_columns = request_data['row_sort_cols'].split('~')
        except KeyError:
            pass
        row_order = 'NULL'
    first_dimension = request_data['first_dimension']
    try:
        col_limit = int(request_data['col_limit'])
    except KeyError:
        col_limit = None
    try:
        row_limit = int(request_data['row_limit'])
    except KeyError:
        row_limit = None
    try:
        col_offset = int(request_data['col_offset'])
    except KeyError:
        col_offset = None
    try:
        row_offset = int(request_data['row_offset'])
    except KeyError:
        row_offset = None
    #Set fail limit to one past so we know if we hit it
    try:
        col_fail_limit = int(request_data['col_fail_limit'])+1
    except KeyError:
        col_fail_limit = None
    try:
        row_sort_property = request_data['row_sort_property']
    except KeyError:
        row_sort_property = None
    try:
        col_key = request_data['col_key']
    except KeyError:
        col_key = None
    try:
        sort_mode = request_data['sort_mode']
    except KeyError:
        sort_mode = None


    col_index_field = datatable + '_column_index'
    row_index_field = datatable + '_row_index'
    col_properties.append(col_index_field)
    row_properties.append(row_index_field)

    with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur:
        col_tableid, row_tableid = get_table_ids(cur, datatable)
        col_tablename = get_workspace_table_name(col_tableid, workspace)
        row_tablename = get_workspace_table_name(row_tableid, workspace)

        col_result = index_table_query(cur,
                                       col_tablename,
                                       col_properties,
                                       col_qry,
                                       col_order,
                                       col_limit,
                                       col_offset,
                                       col_fail_limit,
                                       col_index_field)

        if len(row_order_columns) > 0:
            #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort.
            row_result = index_table_query(cur,
                                           row_tablename,
                                           row_properties,
                                           row_qry,
                                           row_order,
                                           None,
                                           None,
                                           None,
                                           row_index_field)

        else:
            row_result = index_table_query(cur,
                                           row_tablename,
                                           row_properties,
                                           row_qry,
                                           row_order,
                                           row_limit,
                                           row_offset,
                                           None,
                                           row_index_field)

        col_idx = col_result[col_index_field]
        row_idx = row_result[row_index_field]
        if len(col_idx) == col_fail_limit:
            result_set = [('_over_col_limit', np.array([0], dtype='i1'))]
        else:
            del col_result[col_index_field]
            del row_result[row_index_field]

            if len(row_order_columns) > 0 and len(row_idx) > 0:
                #Translate primkeys to idx
                sqlquery = "SELECT {col_field}, {idx_field} FROM {table} WHERE {col_field} IN ({params})".format(
                    idx_field=DQXDbTools.ToSafeIdentifier(col_index_field),
                    table=DQXDbTools.ToSafeIdentifier(col_tablename),
                    params="'"+"','".join(map(DQXDbTools.ToSafeIdentifier, row_order_columns))+"'",
                    col_field=DQXDbTools.ToSafeIdentifier(col_key))
                print sqlquery
                cur.execute(sqlquery)
                idx_for_col = dict((k, v) for k,v in cur.fetchall())
                #Sort by the order specified - reverse so last clicked is major sort
                sort_col_idx = list(reversed(map(lambda key: idx_for_col[key], row_order_columns)))
                #grab the data needed to sort
                sort_data = extract2D(dataset, datatable, row_idx, sort_col_idx, first_dimension, [row_sort_property])
                rows = zip(row_idx, sort_data[row_sort_property])
                if sort_mode == 'call':
                    polyploid_key_func = lambda row: ''.join(summarise_call(calls) for calls in row[1])
                    haploid_key_func = lambda row: ''.join(map(lambda c: str(c).zfill(2), row[1]))
                    if len(rows[0][1].shape) == 1:
                        rows.sort(key=haploid_key_func, reverse=True)
                    else:
                        rows.sort(key=polyploid_key_func, reverse=True)
                elif sort_mode == 'fraction':
                    for i in range(len(sort_col_idx)):
                        #TODO Shuld be some fancy bayesian shizzle
                        def key_func(row):
                            if sum(row[1][i]) == 0:
                                return '-1'
                            return str(1-float(row[1][i][0])/sum(row[1][i]))+str(sum(row[1][i])).zfill(4)
                        rows.sort(key=key_func, reverse=True)
                else:
                    print "Unimplemented sort_mode"
                row_pos_for_idx = dict(zip(row_idx, range(len(row_idx))))
                #Now just get the row_idx to pass to 2d extract for the slice we need
                row_idx = np.array(map(itemgetter(0), rows)[row_offset: row_offset+row_limit])
                #Use this row idx to retieve the row data from the initial query
                for name, array in row_result.items():
                    row_result[name] = array[[row_pos_for_idx[idx] for idx in row_idx]]

            two_d_result = extract2D(dataset, datatable, row_idx, col_idx, first_dimension, two_d_properties)

            result_set = []
            for name, array in col_result.items():
                result_set.append((('col_'+name), array))
            for name, array in row_result.items():
                result_set.append((('row_'+name), array))
            for name, array in two_d_result.items():
                result_set.append((('2D_'+name), array))
    data = gzip(data=''.join(arraybuffer.encode_array_set(result_set)))
    status = '200 OK'
    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', str(len(data))),
                        ('Content-Encoding', 'gzip')]
    start_response(status, response_headers)
    yield data