Exemple #1
0
def get_fragments_by_loci(request):
    '''
    Retrieve a list of locations and return the corresponding matrix fragments

    Args:

    request (django.http.HTTPRequest): The request object containing the
        list of loci.

    Return:

    '''

    if type(request.data) is str:
        return JsonResponse(
            {
                'error': 'Request body needs to be an array or object.',
                'error_message': 'Request body needs to be an array or object.'
            },
            status=400)

    try:
        loci = request.data.get('loci', [])
    except AttributeError:
        loci = request.data
    except Exception as e:
        return JsonResponse(
            {
                'error': 'Could not read request body.',
                'error_message': str(e)
            },
            status=400)

    try:
        forced_rep_idx = request.data.get('representativeIndices', None)
    except Exception as e:
        forced_rep_idx = None
        pass
    '''
    Loci list must be of type:
    [cooler]          [imtiles]
    0: chrom1         start1
    1: start1         end1
    2: end1           start2
    3: chrom2         end2
    4: start2         dataset
    5: end2           zoomLevel
    6: dataset        dim*
    7: zoomOutLevel
    8: dim*

    *) Optional
    '''

    params = get_params(request, GET_FRAG_PARAMS)

    dims = params['dims']
    padding = params['padding']
    no_balance = params['no-balance']
    percentile = params['percentile']
    precision = params['precision']
    no_cache = params['no-cache']
    ignore_diags = params['ignore-diags']
    no_normalize = params['no-normalize']
    aggregate = params['aggregate']
    aggregation_method = params['aggregation-method']
    max_previews = params['max-previews']
    encoding = params['encoding']
    representatives = params['representatives']

    # Check if requesting a snippet from a `.cool` cooler file
    is_cool = len(loci) and len(loci[0]) > 7
    tileset_idx = 6 if is_cool else 4
    zoom_level_idx = tileset_idx + 1

    filetype = None
    new_filetype = None
    previews = []
    previews_2d = []
    ts_cache = {}
    mat_idx = None

    total_valid_loci = 0
    loci_lists = {}
    loci_ids = []
    try:
        for locus in loci:
            tileset_file = ''

            if locus[tileset_idx]:
                if locus[tileset_idx] in ts_cache:
                    tileset = ts_cache[locus[tileset_idx]]['obj']
                    tileset_file = ts_cache[locus[tileset_idx]]['path']
                elif locus[tileset_idx].endswith('.cool'):
                    tileset_file = path.join('data', locus[tileset_idx])
                else:
                    try:
                        tileset = Tileset.objects.get(uuid=locus[tileset_idx])
                        tileset_file = tileset.datafile.path
                        ts_cache[locus[tileset_idx]] = {
                            "obj": tileset,
                            "path": tileset_file
                        }

                    except AttributeError:
                        return JsonResponse(
                            {
                                'error':
                                'Tileset ({}) does not exist'.format(
                                    locus[tileset_idx]),
                            },
                            status=400)
                    except Tileset.DoesNotExist:
                        if locus[tileset_idx].startswith('osm'):
                            new_filetype = locus[tileset_idx]
                        else:
                            return JsonResponse(
                                {
                                    'error':
                                    'Tileset ({}) does not exist'.format(
                                        locus[tileset_idx]),
                                },
                                status=400)
            else:
                return JsonResponse({
                    'error': 'Tileset not specified',
                },
                                    status=400)

            # Get the dimensions of the snippets (i.e., width and height in px)
            inset_dim = (locus[zoom_level_idx + 1] if
                         (len(locus) >= zoom_level_idx + 2
                          and locus[zoom_level_idx + 1]) else None)
            out_dim = dims if inset_dim is None else inset_dim

            # Make sure out dim (in pixel) is not too large
            if ((is_cool and out_dim > hss.SNIPPET_MAT_MAX_OUT_DIM) or
                (not is_cool and out_dim > hss.SNIPPET_IMG_MAX_OUT_DIM)):
                return JsonResponse(
                    {
                        'error': 'Snippet too large',
                        'error_message': str(SnippetTooLarge())
                    },
                    status=400)

            if tileset_file not in loci_lists:
                loci_lists[tileset_file] = {}

            if is_cool:
                # Get max abs dim in base pairs
                max_abs_dim = max(locus[2] - locus[1], locus[5] - locus[4])

                with h5py.File(tileset_file, 'r') as f:
                    # get base resolution (bin size) of cooler file
                    if 'resolutions' in f:
                        # v2
                        resolutions = sorted(
                            [int(key) for key in f['resolutions'].keys()])
                        closest_res = 0
                        for i, res in enumerate(resolutions):
                            if (max_abs_dim / out_dim) - res < 0:
                                closest_res = resolutions[max(0, i - 1)]
                                break
                        zoomout_level = (locus[zoom_level_idx]
                                         if locus[zoom_level_idx] >= 0 else
                                         closest_res)
                    else:
                        # v1
                        max_zoom = f.attrs['max-zoom']
                        bin_size = int(f[str(max_zoom)].attrs['bin-size'])

                        # Find closest zoom level if `zoomout_level < 0`
                        # Assuming resolutions of powers of 2
                        zoomout_level = (locus[zoom_level_idx] if
                                         locus[zoom_level_idx] >= 0 else floor(
                                             log((max_abs_dim / bin_size) /
                                                 out_dim, 2)))

            else:
                # Get max abs dim in base pairs
                max_abs_dim = max(locus[1] - locus[0], locus[3] - locus[2])

                bin_size = 1

                # Find closest zoom level if `zoomout_level < 0`
                # Assuming resolutions of powers of 2
                zoomout_level = (locus[zoom_level_idx]
                                 if locus[zoom_level_idx] >= 0 else floor(
                                     log((max_abs_dim / bin_size) /
                                         out_dim, 2)))

            if zoomout_level not in loci_lists[tileset_file]:
                loci_lists[tileset_file][zoomout_level] = []

            locus_id = '.'.join(map(str, locus))

            loci_lists[tileset_file][zoomout_level].append(
                locus[0:tileset_idx] + [total_valid_loci, inset_dim, locus_id])
            loci_ids.append(locus_id)

            if new_filetype is None:
                new_filetype = (tileset.filetype if tileset else
                                tileset_file[tileset_file.rfind('.') + 1:])

            if filetype is None:
                filetype = new_filetype

            if filetype != new_filetype:
                return JsonResponse(
                    {
                        'error':
                        ('Multiple file types per query are not supported yet.'
                         )
                    },
                    status=400)

            total_valid_loci += 1

    except Exception as e:
        return JsonResponse(
            {
                'error': 'Could not convert loci.',
                'error_message': str(e)
            },
            status=500)

    mat_idx = list(range(len(loci_ids)))

    # Get a unique string for caching
    dump = (json.dumps(loci, sort_keys=True) + str(forced_rep_idx) +
            str(dims) + str(padding) + str(no_balance) + str(percentile) +
            str(precision) + str(ignore_diags) + str(no_normalize) +
            str(aggregate) + str(aggregation_method) + str(max_previews) +
            str(encoding) + str(representatives))
    uuid = hashlib.md5(dump.encode('utf-8')).hexdigest()

    # Check if something is cached
    if not no_cache:
        try:
            results = rdb.get('frag_by_loci_%s' % uuid)
            if results:
                return JsonResponse(pickle.loads(results))
        except:
            pass

    matrices = [None] * total_valid_loci
    data_types = [None] * total_valid_loci
    try:
        for dataset in loci_lists:
            for zoomout_level in loci_lists[dataset]:
                if filetype == 'cooler' or filetype == 'cool':
                    raw_matrices = get_frag_by_loc_from_cool(
                        dataset,
                        loci_lists[dataset][zoomout_level],
                        dims,
                        zoomout_level=zoomout_level,
                        balanced=not no_balance,
                        padding=int(padding),
                        percentile=percentile,
                        ignore_diags=ignore_diags,
                        no_normalize=no_normalize,
                        aggregate=aggregate,
                    )

                    for i, matrix in enumerate(raw_matrices):
                        idx = loci_lists[dataset][zoomout_level][i][6]
                        matrices[idx] = matrix
                        data_types[idx] = 'matrix'

                if filetype == 'imtiles' or filetype == 'osm-image':
                    extractor = (get_frag_by_loc_from_imtiles if filetype
                                 == 'imtiles' else get_frag_by_loc_from_osm)

                    sub_ims = extractor(
                        imtiles_file=dataset,
                        loci=loci_lists[dataset][zoomout_level],
                        zoom_level=zoomout_level,
                        padding=float(padding),
                        no_cache=no_cache,
                    )

                    for i, im in enumerate(sub_ims):
                        idx = loci_lists[dataset][zoomout_level][i][4]

                        matrices[idx] = im

                        data_types[idx] = 'matrix'

    except Exception as ex:
        raise
        return JsonResponse(
            {
                'error': 'Could not retrieve fragments.',
                'error_message': str(ex)
            },
            status=500)

    if aggregate and len(matrices) > 1:
        try:
            cover, previews_1d, previews_2d = aggregate_frags(
                matrices,
                loci_ids,
                aggregation_method,
                max_previews,
            )
            matrices = [cover]
            mat_idx = []
            if previews_1d is not None:
                previews = np.split(previews_1d, range(1,
                                                       previews_1d.shape[0]))
            data_types = [data_types[0]]
        except Exception as ex:
            raise
            return JsonResponse(
                {
                    'error': 'Could not aggregate fragments.',
                    'error_message': str(ex)
                },
                status=500)

    if representatives and len(matrices) > 1:
        if forced_rep_idx and len(forced_rep_idx) <= len(matrices):
            matrices = [matrices[i] for i in forced_rep_idx]
            mat_idx = forced_rep_idx
            data_types = [data_types[0]] * len(forced_rep_idx)
        else:
            try:
                rep_frags, rep_idx = get_rep_frags(matrices, loci, loci_ids,
                                                   representatives, no_cache)
                matrices = rep_frags
                mat_idx = rep_idx
                data_types = [data_types[0]] * len(rep_frags)
            except Exception as ex:
                raise
                return JsonResponse(
                    {
                        'error': 'Could get representative fragments.',
                        'error_message': str(ex)
                    },
                    status=500)

    if encoding != 'b64' and encoding != 'image':
        # Adjust precision and convert to list
        for i, matrix in enumerate(matrices):
            if precision > 0:
                matrix = np.round(matrix, decimals=precision)
            matrices[i] = matrix.tolist()

        if max_previews > 0:
            for i, preview in enumerate(previews):
                previews[i] = preview.tolist()
            for i, preview_2d in enumerate(previews_2d):
                previews_2d[i] = preview_2d.tolist()

    # Encode matrix if required
    if encoding == 'b64':
        for i, matrix in enumerate(matrices):
            id = loci_ids[mat_idx[i]]
            data_types[i] = 'dataUrl'
            if not no_cache and id:
                mat_b64 = None
                try:
                    mat_b64 = rdb.get('im_b64_%s' % id)
                    if mat_b64 is not None:
                        matrices[i] = mat_b64.decode('ascii')
                        continue
                except:
                    pass

            mat_b64 = pybase64.b64encode(np_to_png(matrix)).decode('ascii')

            if not no_cache:
                try:
                    rdb.set('im_b64_%s' % id, mat_b64, 60 * 30)
                except Exception as ex:
                    # error caching a tile
                    # log the error and carry forward, this isn't critical
                    logger.warn(ex)

            matrices[i] = mat_b64

        if max_previews > 0:
            for i, preview in enumerate(previews):
                previews[i] = pybase64.b64encode(
                    np_to_png(preview)).decode('ascii')
            for i, preview_2d in enumerate(previews_2d):
                previews_2d[i] = pybase64.b64encode(
                    np_to_png(preview_2d)).decode('ascii')

    # Create results
    results = {
        'fragments': matrices,
        'indices': [int(i) for i in mat_idx],
        'dataTypes': data_types,
    }

    # Return Y aggregates as 1D previews on demand
    if max_previews > 0:
        results['previews'] = previews
        results['previews2d'] = previews_2d

    # Cache results for 30 minutes
    try:
        rdb.set('frag_by_loci_%s' % uuid, pickle.dumps(results), 60 * 30)
    except Exception as ex:
        # error caching a tile
        # log the error and carry forward, this isn't critical
        logger.warn(ex)

    if encoding == 'image':
        if len(matrices) == 1:
            return HttpResponse(np_to_png(
                grey_to_rgb(matrices[0], to_rgba=True)),
                                content_type='image/png')
        else:
            ims = []
            for i, matrix in enumerate(matrices):
                ims.append({
                    'name':
                    '{}.png'.format(i),
                    'bytes':
                    np_to_png(grey_to_rgb(matrix, to_rgba=True))
                })
            return blob_to_zip(ims, to_resp=True)

    return JsonResponse(results)
Exemple #2
0
def get_fragments_by_loci(request):
    '''
    Retrieve a list of locations and return the corresponding matrix fragments

    Args:

    request (django.http.HTTPRequest): The request object containing the
        list of loci.

    Return:

    '''

    if type(request.data) is str:
        return JsonResponse(
            {
                'error': 'Request body needs to be an array or object.',
                'error_message': 'Request body needs to be an array or object.'
            },
            status=400)

    try:
        loci = request.data.get('loci', [])
    except AttributeError:
        loci = request.data
    except Exception as e:
        return JsonResponse(
            {
                'error': 'Could not read request body.',
                'error_message': str(e)
            },
            status=400)

    try:
        forced_rep_idx = request.data.get('representativeIndices', None)
    except Exception as e:
        forced_rep_idx = None
        pass
    '''
    Loci list must be of type:
    [cooler]          [imtiles]
    0: chrom1         start1
    1: start1         end1
    2: end1           start2
    3: chrom2         end2
    4: start2         dataset
    5: end2           zoomLevel
    6: dataset        dim*
    7: zoomOutLevel
    8: dim*

    *) Optional
    '''

    params = get_params(request, GET_FRAG_PARAMS)

    dims = params['dims']
    padding = params['padding']
    no_balance = params['no-balance']
    percentile = params['percentile']
    precision = params['precision']
    no_cache = params['no-cache']
    ignore_diags = params['ignore-diags']
    no_normalize = params['no-normalize']
    aggregate = params['aggregate']
    aggregation_method = params['aggregation-method']
    max_previews = params['max-previews']
    encoding = params['encoding']
    representatives = params['representatives']

    tileset_idx = 6 if len(loci) and len(loci[0]) > 7 else 4
    zoom_level_idx = tileset_idx + 1

    filetype = None
    new_filetype = None
    previews = []
    previews_2d = []
    ts_cache = {}
    mat_idx = None

    i = 0
    loci_lists = {}
    loci_ids = []
    try:
        for locus in loci:
            tileset_file = ''

            if locus[tileset_idx]:
                if locus[tileset_idx] in ts_cache:
                    tileset = ts_cache[locus[tileset_idx]]['obj']
                    tileset_file = ts_cache[locus[tileset_idx]]['path']
                elif locus[tileset_idx].endswith('.cool'):
                    tileset_file = path.join('data', locus[tileset_idx])
                else:
                    try:
                        tileset = Tileset.objects.get(uuid=locus[tileset_idx])
                        tileset_file = get_datapath(tileset.datafile.url)
                        ts_cache[locus[tileset_idx]] = {
                            "obj": tileset,
                            "path": tileset_file
                        }

                    except AttributeError:
                        return JsonResponse(
                            {
                                'error':
                                'Tileset ({}) does not exist'.format(
                                    locus[tileset_idx]),
                            },
                            status=400)
                    except Tileset.DoesNotExist:
                        if locus[tileset_idx].startswith('osm'):
                            new_filetype = locus[tileset_idx]
                        else:
                            return JsonResponse(
                                {
                                    'error':
                                    'Tileset ({}) does not exist'.format(
                                        locus[tileset_idx]),
                                },
                                status=400)
            else:
                return JsonResponse({
                    'error': 'Tileset not specified',
                },
                                    status=400)

            if tileset_file not in loci_lists:
                loci_lists[tileset_file] = {}

            if locus[zoom_level_idx] not in loci_lists[tileset_file]:
                loci_lists[tileset_file][locus[zoom_level_idx]] = []

            inset_dim = (locus[zoom_level_idx + 1] if
                         (len(locus) >= zoom_level_idx + 2
                          and locus[zoom_level_idx + 1]) else 0)

            locus_id = '.'.join(map(str, locus))

            loci_lists[tileset_file][locus[zoom_level_idx]].append(
                locus[0:tileset_idx] + [i, inset_dim, locus_id])
            loci_ids.append(locus_id)

            if new_filetype is None:
                new_filetype = (tileset.filetype if tileset else
                                tileset_file[tileset_file.rfind('.') + 1:])

            if filetype is None:
                filetype = new_filetype

            if filetype != new_filetype:
                return JsonResponse(
                    {
                        'error':
                        ('Multiple file types per query are not supported yet.'
                         )
                    },
                    status=400)

            i += 1

    except Exception as e:
        return JsonResponse(
            {
                'error': 'Could not convert loci.',
                'error_message': str(e)
            },
            status=500)

    mat_idx = list(range(len(loci_ids)))

    # Get a unique string for caching
    dump = (json.dumps(loci, sort_keys=True) + str(forced_rep_idx) +
            str(dims) + str(padding) + str(no_balance) + str(percentile) +
            str(precision) + str(ignore_diags) + str(no_normalize) +
            str(aggregate) + str(aggregation_method) + str(max_previews) +
            str(encoding) + str(representatives))
    uuid = hashlib.md5(dump.encode('utf-8')).hexdigest()

    # Check if something is cached
    if not no_cache:
        try:
            results = rdb.get('frag_by_loci_%s' % uuid)
            if results:
                return JsonResponse(pickle.loads(results))
        except:
            pass

    matrices = [None] * i
    data_types = [None] * i
    try:
        for dataset in loci_lists:
            for zoomout_level in loci_lists[dataset]:
                if filetype == 'cooler' or filetype == 'cool':
                    raw_matrices = get_frag_by_loc_from_cool(
                        dataset,
                        loci_lists[dataset][zoomout_level],
                        dims,
                        zoomout_level=zoomout_level,
                        balanced=not no_balance,
                        padding=int(padding),
                        percentile=percentile,
                        ignore_diags=ignore_diags,
                        no_normalize=no_normalize,
                        aggregate=aggregate,
                    )

                    for i, matrix in enumerate(raw_matrices):
                        idx = loci_lists[dataset][zoomout_level][i][6]
                        matrices[idx] = matrix
                        data_types[idx] = 'matrix'

                if filetype == 'imtiles' or filetype == 'osm-image':
                    extractor = (get_frag_by_loc_from_imtiles if filetype
                                 == 'imtiles' else get_frag_by_loc_from_osm)

                    sub_ims = extractor(
                        imtiles_file=dataset,
                        loci=loci_lists[dataset][zoomout_level],
                        zoom_level=zoomout_level,
                        padding=float(padding),
                        no_cache=no_cache,
                    )

                    for i, im in enumerate(sub_ims):
                        idx = loci_lists[dataset][zoomout_level][i][4]

                        matrices[idx] = im

                        data_types[idx] = 'matrix'

    except Exception as ex:
        raise
        return JsonResponse(
            {
                'error': 'Could not retrieve fragments.',
                'error_message': str(ex)
            },
            status=500)

    if aggregate and len(matrices) > 1:
        try:
            cover, previews_1d, previews_2d = aggregate_frags(
                matrices,
                loci_ids,
                aggregation_method,
                max_previews,
            )
            matrices = [cover]
            mat_idx = []
            if previews_1d is not None:
                previews = np.split(previews_1d, range(1,
                                                       previews_1d.shape[0]))
            data_types = [data_types[0]]
        except Exception as ex:
            raise
            return JsonResponse(
                {
                    'error': 'Could not aggregate fragments.',
                    'error_message': str(ex)
                },
                status=500)

    if representatives and len(matrices) > 1:
        if forced_rep_idx and len(forced_rep_idx) <= len(matrices):
            matrices = [matrices[i] for i in forced_rep_idx]
            mat_idx = forced_rep_idx
            data_types = [data_types[0]] * len(forced_rep_idx)
        else:
            try:
                rep_frags, rep_idx = get_rep_frags(matrices, loci, loci_ids,
                                                   representatives, no_cache)
                matrices = rep_frags
                mat_idx = rep_idx
                data_types = [data_types[0]] * len(rep_frags)
            except Exception as ex:
                raise
                return JsonResponse(
                    {
                        'error': 'Could get representative fragments.',
                        'error_message': str(ex)
                    },
                    status=500)

    if encoding != 'b64' and encoding != 'image':
        # Adjust precision and convert to list
        for i, matrix in enumerate(matrices):
            if precision > 0:
                matrix = np.round(matrix, decimals=precision)
            matrices[i] = matrix.tolist()

        if max_previews > 0:
            for i, preview in enumerate(previews):
                previews[i] = preview.tolist()
            for i, preview_2d in enumerate(previews_2d):
                previews_2d[i] = preview_2d.tolist()

    # Encode matrix if required
    if encoding == 'b64':
        for i, matrix in enumerate(matrices):
            id = loci_ids[mat_idx[i]]
            data_types[i] = 'dataUrl'
            if not no_cache and id:
                mat_b64 = None
                try:
                    mat_b64 = rdb.get('im_b64_%s' % id)
                    if mat_b64 is not None:
                        matrices[i] = mat_b64.decode('ascii')
                        continue
                except:
                    pass

            mat_b64 = pybase64.b64encode(np_to_png(matrix)).decode('ascii')

            if not no_cache:
                rdb.set('im_b64_%s' % id, mat_b64, 60 * 30)

            matrices[i] = mat_b64

        if max_previews > 0:
            for i, preview in enumerate(previews):
                previews[i] = pybase64.b64encode(
                    np_to_png(preview)).decode('ascii')
            for i, preview_2d in enumerate(previews_2d):
                previews_2d[i] = pybase64.b64encode(
                    np_to_png(preview_2d)).decode('ascii')

    # Create results
    results = {
        'fragments': matrices,
        'indices': [int(i) for i in mat_idx],
        'dataTypes': data_types,
    }

    # Return Y aggregates as 1D previews on demand
    if max_previews > 0:
        results['previews'] = previews
        results['previews2d'] = previews_2d

    # Cache results for 30 minutes
    rdb.set('frag_by_loci_%s' % uuid, pickle.dumps(results), 60 * 30)

    if encoding == 'image':
        if len(matrices) == 1:
            return HttpResponse(np_to_png(
                grey_to_rgb(matrices[0], to_rgba=True)),
                                content_type='image/png')
        else:
            ims = []
            for i, matrix in enumerate(matrices):
                ims.append({
                    'name':
                    '{}.png'.format(i),
                    'bytes':
                    np_to_png(grey_to_rgb(matrix, to_rgba=True))
                })
            return blob_to_zip(ims, to_resp=True)

    return JsonResponse(results)