Python get_tsv_chromsizes Examples

Programming Language: Python

Namespace/Package Name: tilesets.chromsizes

Method/Function: get_tsv_chromsizes

Examples at hotexamples.com: 6

Python get_tsv_chromsizes - 6 examples found. These are the top rated real world Python examples of tilesets.chromsizes.get_tsv_chromsizes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: generate_tiles.py Project: cliffmeyer/higlass-server

def get_chromsizes(tileset):
    '''
    Get a set of chromsizes matching the coordSystem of this
    tileset.

    Parameters
    ----------
    tileset: A tileset DJango model object

    Returns
    -------
    chromsizes: [[chrom, sizes]]
        A set of chromsizes to be used with this bigWig file.
        None if no chromsizes tileset with this coordSystem
        exists or if two exist with this coordSystem.
    '''
    if tileset.coordSystem is None or len(tileset.coordSystem) == None:
        return None

    try:
        chrom_info_tileset = tm.Tileset.objects.get(
            coordSystem=tileset.coordSystem, datatype='chromsizes')
    except:
        return None

    return tcs.get_tsv_chromsizes(chrom_info_tileset.datafile.path)

Example #2

Show file

File: views.py Project: dbmi-bgm/cgap-higlass-server

def tileset_info(request):
    """Get information about a tileset

    Tilesets have information critical to their display
    such as the maximum number of dimensions and well as
    their width. This needs to be relayed to the client
    in order for it to know which tiles to request.

    Args:
        request (django.http.HTTPRequest): The request object
            containing tileset_ids in the 'd' parameter.
    Return:
        django.http.JsonResponse: A JSON object containing
            the tileset meta-information
    """
    queryset = tm.Tileset.objects.all()
    tileset_uuids = request.GET.getlist("d")
    tileset_infos = {}

    chromsizes_error = None

    if "cs" in request.GET:
        # we need to call a different server to get the tiles
        if not "ci" in request.GET.getlist:
            chromsizes_error = "cs param present without ci"

        # call the request server and get the chromsizes
        pass
    else:
        if "ci" in request.GET:
            try:
                chromsizes = tm.Tileset.objects.get(uuid=request.GET["ci"])
                data = tcs.chromsizes_array_to_series(
                    tcs.get_tsv_chromsizes(chromsizes.datafile.path)
                )
            except Exception as ex:
                pass

    for tileset_uuid in tileset_uuids:
        tileset_object = queryset.filter(uuid=tileset_uuid).first()

        if tileset_uuid == "osm-image":
            tileset_infos[tileset_uuid] = {
                "min_x": 0,
                "max_height": 134217728,
                "min_y": 0,
                "max_y": 134217728,
                "max_zoom": 19,
                "tile_size": 256,
            }
            continue

        if tileset_object is None:
            tileset_infos[tileset_uuid] = {
                "error": "No such tileset with uid: {}".format(tileset_uuid)
            }
            continue

        if tileset_object.private and request.user != tileset_object.owner:
            # dataset is not public
            tileset_infos[tileset_uuid] = {"error": "Forbidden"}
            continue

        if tileset_object.requiresAuthentication and not request.user.is_authenticated:
            # dataset is not public
            tileset_infos[tileset_uuid] = {
                "error": "This request required authentication"
            }
            continue
        
        #print(tileset_uuid,"accessibleTilesets" in request.session)
        if tileset_object.requiresAuthentication and (
            ("accessibleTilesets" not in request.session)
            or (tileset_uuid not in request.session["accessibleTilesets"])
        ):
            # dataset is not accessible for this user
            tileset_infos[tileset_uuid] = {
                "error": "You don't have access to this tileset"
            }
            continue

        if tileset_object.filetype == "hitile" or tileset_object.filetype == "hibed":
            tileset_info = hdft.get_tileset_info(
                h5py.File(tileset_object.datafile.path, "r")
            )
            tileset_infos[tileset_uuid] = {
                "min_pos": [int(tileset_info["min_pos"])],
                "max_pos": [int(tileset_info["max_pos"])],
                "max_width": 2
                ** math.ceil(
                    math.log(tileset_info["max_pos"] - tileset_info["min_pos"])
                    / math.log(2)
                ),
                "tile_size": int(tileset_info["tile_size"]),
                "max_zoom": int(tileset_info["max_zoom"]),
            }
        elif tileset_object.filetype == "bigwig":
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes)
            # print('tsinfo:', tsinfo)
            if "chromsizes" in tsinfo:
                tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == "bigbed":
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes)
            # print('tsinfo:', tsinfo)
            if "chromsizes" in tsinfo:
                tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == "multivec":
            tileset_infos[tileset_uuid] = hgmu.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "elastic_search":
            response = urllib.urlopen(tileset_object.datafile + "/tileset_info")
            tileset_infos[tileset_uuid] = json.loads(response.read())
        elif tileset_object.filetype == "beddb":
            tileset_infos[tileset_uuid] = cdt.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "bed2ddb":
            tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "cooler":
            tileset_infos[tileset_uuid] = hgco.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "time-interval-json":
            tileset_infos[tileset_uuid] = hgti.tileset_info(
                tileset_object.datafile.path
            )
        elif (
            tileset_object.filetype == "2dannodb"
            or tileset_object.filetype == "imtiles"
        ):
            tileset_infos[tileset_uuid] = hgim.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "geodb":
            tileset_infos[tileset_uuid] = hggo.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "bam":
            tileset_infos[tileset_uuid] = ctb.tileset_info(tileset_object.datafile.path)
            tileset_infos[tileset_uuid]["max_tile_width"] = hss.MAX_BAM_TILE_WIDTH
        else:
            # Unknown filetype
            tileset_infos[tileset_uuid] = {
                "error": "Unknown filetype " + tileset_object.filetype
            }

        tileset_infos[tileset_uuid]["name"] = tileset_object.name
        tileset_infos[tileset_uuid]["datatype"] = tileset_object.datatype
        tileset_infos[tileset_uuid]["coordSystem"] = tileset_object.coordSystem
        tileset_infos[tileset_uuid]["coordSystem2"] = tileset_object.coordSystem2

    return JsonResponse(tileset_infos)

Example #3

Show file

File: views.py Project: dbmi-bgm/cgap-higlass-server

def sizes(request):
    """Return chromosome sizes.
    Retrieves the chromSiyes.tsv and either retrieves it as is or converts it
    to a JSON format.
    Args:
        request: HTTP GET request object. The request can feature the following
            queries:
            id: id of the stored chromSizes [e.g.: hg19 or mm9]
            type: return data format [tsv or json]
            cum: return cumulative size or offset [0 or 1]
    Returns:
        A HTTP text or JSON response depending on the GET request.
        A text response looks like this:
        ```
        chr1    1
        chr2    2
        ...
        ```
        A JSON response looks like this:
        ```
        {
            chr1: {
                size: 1,
                offset: 0
            }
            chr2: {
                size: 2,
                offset: 1
            },
            ...
        }
        ```
    """
    uuid = request.GET.get("id", False)
    res_type = request.GET.get("type", "tsv")
    incl_cum = request.GET.get("cum", False)

    response = HttpResponse
    is_json = False

    if res_type == "json":
        is_json = True
        response = JsonResponse

    if res_type != "json" and incl_cum:
        return response(
            "Sorry buddy. Cumulative sizes not yet supported for non-JSON "
            "file types. ðŸ˜ž",
            status=501,
        )

    # Try to find the db entry
    try:
        chrom_sizes = tm.Tileset.objects.get(uuid=uuid)
    except Exception as e:
        logger.exception(e)
        err_msg = "Oh lord! ChromSizes for %s not found. ðŸ˜¬" % uuid
        err_status = 404

        if is_json:
            return response({"error": err_msg}, status=err_status)

        return response(err_msg, status=err_status)

    # Try to load the chromosome sizes and return them as a list of
    # (name, size) tuples
    try:
        if tgt.get_tileset_filetype(chrom_sizes) == "bigwig":
            data = hgbi.chromsizes(chrom_sizes.datafile.path)
        elif tgt.get_tileset_filetype(chrom_sizes) == "bigbed":
            data = hgbb.chromsizes(chrom_sizes.datafile.path)
        elif tgt.get_tileset_filetype(chrom_sizes) == "cooler":
            data = tcs.get_cooler_chromsizes(chrom_sizes.datafile.path)
        elif tgt.get_tileset_filetype(chrom_sizes) == "chromsizes-tsv":
            data = tcs.get_tsv_chromsizes(chrom_sizes.datafile.path)
        elif tgt.get_tileset_filetype(chrom_sizes) == "multivec":
            data = tcs.get_multivec_chromsizes(chrom_sizes.datafile.path)
        else:
            data = ""

    except Exception as ex:
        logger.exception(ex)
        err_msg = str(ex)
        err_status = 500

        if is_json:
            return response({"error": err_msg}, status=err_status)

        return response(err_msg, status=err_status)

    # Convert the stuff if needed
    try:
        # data should be a list of (name, size) tuples coming
        # coming and converted to a more appropriate data type
        # going out
        if res_type == "tsv":
            lines = []
            for (name, size) in data:
                lines += ["{}\t{}\n".format(name, size)]
                data = lines

        if res_type == "json" and not incl_cum:
            json_out = {}

            for row in data:
                json_out[row[0]] = {"size": int(row[1])}

            data = json_out

        if res_type == "json" and incl_cum:
            json_out = {}
            cum = 0

            for row in data:
                size = int(row[1])

                json_out[row[0]] = {"size": size, "offset": cum}
                cum += size

            data = json_out
    except Exception as e:
        logger.exception(e)
        err_msg = "THIS IS AN OUTRAGE!!!1! Something failed. ðŸ˜¡"
        err_status = 500

        if is_json:
            return response({"error": err_msg}, status=err_status)

        return response(err_msg, status=err_status)

    return response(data)

Example #4

Show file

File: ingest_tileset.py Project: hms-dbmi/cistrome-explorer-higlass-server

def check_for_chromsizes(filename, coord_system):
    '''
    Check to see if we have chromsizes matching the coord system
    of the filename.

    Parameters
    ----------
    filename: string
        The name of the bigwig file
    coord_system: string
        The coordinate system (assembly) of this bigwig file
    '''
    tileset_info = hgbi.tileset_info(filename)
    # print("tileset chromsizes:", tileset_info['chromsizes'])
    tsinfo_chromsizes = set([(str(chrom), str(size)) for chrom, size in tileset_info['chromsizes']])
    # print("tsinfo_chromsizes:", tsinfo_chromsizes)

    chrom_info_tileset = None

    # check if we have a chrom sizes tileset that matches the coordsystem
    # of the input file
    if coord_system is not None and len(coord_system) > 0:
        try:
            chrom_info_tileset = tm.Tileset.objects.filter(
                    coordSystem=coord_system,
                    datatype='chromsizes')

            if len(chrom_info_tileset) > 1:
                raise CommandError("More than one available set of chromSizes"
                        + "for this coordSystem ({})".format(coord_system))

            chrom_info_tileset = chrom_info_tileset.first()
        except dce.ObjectDoesNotExist:
            chrom_info_tileset = None

    matches = []

    if chrom_info_tileset is None:
        # we haven't found chromsizes matching the coordsystem
        # go through every chromsizes file and see if we have a match
        for chrom_info_tileset in tm.Tileset.objects.filter(datatype='chromsizes'):
            chromsizes_set = set([tuple(t) for
                t in tcs.get_tsv_chromsizes(chrom_info_tileset.datafile.path)])

            matches += [(len(set.intersection(chromsizes_set, tsinfo_chromsizes)),
                chrom_info_tileset)]

            # print("chrom_info_tileset:", chromsizes_set)
            #print("intersection:", len(set.intersection(chromsizes_set, tsinfo_chromsizes)))
        #print("coord_system:", coord_system)
    else:
        # a set of chromsizes was provided
        chromsizes_set = set([tuple(t) for
            t in tcs.get_tsv_chromsizes(chrom_info_tileset.datafile.path)])
        matches += [(len(set.intersection(chromsizes_set, tsinfo_chromsizes)),
            chrom_info_tileset)]

    # matches that overlap some chromsizes with the bigwig file
    overlap_matches = [m for m in matches if m[0] > 0]

    if len(overlap_matches) == 0:
        raise CommandError("No chromsizes available which match the chromosomes in this bigwig"
                + "See http://docs.higlass.io/data_preparation.html#bigwig-files "
                + "for more information"
                )

    if len(overlap_matches) > 1:
        raise CommandError("Multiple matching coordSystems:"
                + "See http://docs.higlass.io/data_preparation.html#bigwig-files "
                + "for more information",
                ["({} [{}])".format(t[1].coordSystem, t[0]) for t in overlap_matches])

    if (coord_system is not None
            and len(coord_system) > 0
            and overlap_matches[0][1].coordSystem != coord_system):
        raise CommandError("Matching chromosome sizes (coordSystem: {}) do not "
            + "match the specified coordinate sytem ({}). "
            + "Either omit the coordSystem or specify a matching one."
            + "See http://docs.higlass.io/data_preparation.html#bigwig-files "
            + "for more information".format(overlap_matches[0][1].coordSystem, coord_system))

    if (coord_system is not None
            and len(coord_system) > 0
            and overlap_matches[0][1].coordSystem == coord_system):
        print("Using coordinates for coordinate system: {}".format(coord_system))

    if coord_system is None or len(coord_system) == 0:
        print("No coordinate system specified, but we found matching "
            + "chromsizes. Using coordinate system {}."
            .format(overlap_matches[0][1].coordSystem))

    return overlap_matches[0][1].coordSystem

Example #5

Show file

def tileset_info(request):
    ''' Get information about a tileset

    Tilesets have information critical to their display
    such as the maximum number of dimensions and well as
    their width. This needs to be relayed to the client
    in order for it to know which tiles to request.

    Args:
        request (django.http.HTTPRequest): The request object
            containing tileset_ids in the 'd' parameter.
    Return:
        django.http.JsonResponse: A JSON object containing
            the tileset meta-information
    '''
    queryset = tm.Tileset.objects.all()
    tileset_uuids = request.GET.getlist("d")
    tileset_infos = {}

    chromsizes_error = None

    if 'cs' in request.GET:
        # we need to call a different server to get the tiles
        if not 'ci' in request.GET.getlist:
            chromsizes_error = 'cs param present without ci'

        # call the request server and get the chromsizes
        pass
    else:
        if 'ci' in request.GET:
            try:
                chromsizes = tm.Tileset.objects.get(uuid=request.GET['ci'])
                data = tcs.chromsizes_array_to_series(
                        tcs.get_tsv_chromsizes(chromsizes.datafile.path))
            except Exception as ex:
                pass

    for tileset_uuid in tileset_uuids:
        tileset_object = queryset.filter(uuid=tileset_uuid).first()

        if tileset_uuid == 'osm-image':
            tileset_infos[tileset_uuid] = {
                'min_x': 0,
                'max_height': 134217728,
                'min_y': 0,
                'max_y': 134217728,
                'max_zoom': 19,
                'tile_size': 256
            }
            continue

        if tileset_object is None:
            tileset_infos[tileset_uuid] = {
                'error': 'No such tileset with uid: {}'.format(tileset_uuid)
            }
            continue

        if tileset_object.private and request.user != tileset_object.owner:
            # dataset is not public
            tileset_infos[tileset_uuid] = {'error': "Forbidden"}
            continue

        if (
            tileset_object.filetype == 'hitile' or
            tileset_object.filetype == 'hibed'
        ):
            tileset_info = hdft.get_tileset_info(
                h5py.File(tileset_object.datafile.path, 'r'))
            tileset_infos[tileset_uuid] = {
                "min_pos": [int(tileset_info['min_pos'])],
                "max_pos": [int(tileset_info['max_pos'])],
                "max_width": 2 ** math.ceil(
                    math.log(
                        tileset_info['max_pos'] - tileset_info['min_pos']
                    ) / math.log(2)
                ),
                "tile_size": int(tileset_info['tile_size']),
                "max_zoom": int(tileset_info['max_zoom'])
            }
        elif tileset_object.filetype == 'bigwig':
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(
                    tileset_object.datafile.path,
                    chromsizes
                )
            #print('tsinfo:', tsinfo)
            if 'chromsizes' in tsinfo:
                tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == 'bigbed':
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(
                    tileset_object.datafile.path,
                    chromsizes
                )
            #print('tsinfo:', tsinfo)
            if 'chromsizes' in tsinfo:
                tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == 'multivec':
            tileset_infos[tileset_uuid] = hgmu.tileset_info(
                    tileset_object.datafile.path)
        elif tileset_object.filetype == "elastic_search":
            response = urllib.urlopen(
                tileset_object.datafile + "/tileset_info")
            tileset_infos[tileset_uuid] = json.loads(response.read())
        elif tileset_object.filetype == 'beddb':
            tileset_infos[tileset_uuid] = cdt.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'bed2ddb':
            tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'cooler':
            tileset_infos[tileset_uuid] = hgco.tileset_info(
                    tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'time-interval-json':
            tileset_infos[tileset_uuid] = hgti.tileset_info(
                    tileset_object.datafile.path
            )
        elif (
            tileset_object.filetype == '2dannodb' or
            tileset_object.filetype == 'imtiles'
        ):
            tileset_infos[tileset_uuid] = hgim.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'geodb':
            tileset_infos[tileset_uuid] = hggo.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'bam':
            tileset_infos[tileset_uuid] = ctb.tileset_info(
                tileset_object.datafile.path
            )
            tileset_infos[tileset_uuid]['max_tile_width'] = hss.MAX_BAM_TILE_WIDTH
        else:
            # Unknown filetype
            tileset_infos[tileset_uuid] = {
                'error': 'Unknown filetype ' + tileset_object.filetype
            }

        tileset_infos[tileset_uuid]['name'] = tileset_object.name
        tileset_infos[tileset_uuid]['datatype'] = tileset_object.datatype
        tileset_infos[tileset_uuid]['coordSystem'] = tileset_object.coordSystem
        tileset_infos[tileset_uuid]['coordSystem2'] =\
            tileset_object.coordSystem2

    return JsonResponse(tileset_infos)

Example #6

Show file

File: views.py Project: 4dn-dcic/higlass-server

def sizes(request):
    '''Return chromosome sizes.
    Retrieves the chromSiyes.tsv and either retrieves it as is or converts it
    to a JSON format.
    Args:
        request: HTTP GET request object. The request can feature the following
            queries:
            id: id of the stored chromSizes [e.g.: hg19 or mm9]
            type: return data format [tsv or json]
            cum: return cumulative size or offset [0 or 1]
    Returns:
        A HTTP text or JSON response depending on the GET request.
        A text response looks like this:
        ```
        chr1    1
        chr2    2
        ...
        ```
        A JSON response looks like this:
        ```
        {
            chr1: {
                size: 1,
                offset: 0
            }
            chr2: {
                size: 2,
                offset: 1
            },
            ...
        }
        ```
    '''
    uuid = request.GET.get('id', False)
    res_type = request.GET.get('type', 'tsv')
    incl_cum = request.GET.get('cum', False)

    response = HttpResponse
    is_json = False

    if res_type == 'json':
        is_json = True
        response = JsonResponse

    if res_type != 'json' and incl_cum:
        return response(
            'Sorry buddy. Cumulative sizes not yet supported for non-JSON '
            'file types. ðŸ˜ž', status=501
        )

    # Try to find the db entry
    try:
        chrom_sizes = tm.Tileset.objects.get(uuid=uuid)
    except Exception as e:
        logger.exception(e)
        err_msg = 'Oh lord! ChromSizes for %s not found. ðŸ˜¬' % uuid
        err_status = 404

        if is_json:
            return response({'error': err_msg}, status=err_status)

        return response(err_msg, status=err_status)

    # Try to load the chromosome sizes and return them as a list of
    # (name, size) tuples
    try:
        if tgt.get_tileset_filetype(chrom_sizes) == 'bigwig':
            data = hgbi.chromsizes(tut.get_datapath(chrom_sizes.datafile.url))
        elif tgt.get_tileset_filetype(chrom_sizes) == 'cooler':
            data = tcs.get_cooler_chromsizes(tut.get_datapath(chrom_sizes.datafile.url))
        elif tgt.get_tileset_filetype(chrom_sizes) == 'chromsizes-tsv':
            data = tcs.get_tsv_chromsizes(tut.get_datapath(chrom_sizes.datafile.url))
        elif tgt.get_tileset_filetype(chrom_sizes) == 'multivec':
            data = tcs.get_multivec_chromsizes(tut.get_datapath(chrom_sizes.datafile.url))
        else:
            data = '';

    except Exception as ex:
        logger.exception(ex)
        err_msg = str(ex)
        print('err_msg:', err_msg)
        err_status = 500

        if is_json:
            return response({'error': err_msg}, status=err_status)

        return response(err_msg, status=err_status)

    # Convert the stuff if needed
    try:
        # data should be a list of (name, size) tuples coming
        # coming and converted to a more appropriate data type
        # going out
        if res_type == 'tsv':
            lines = []
            for (name, size) in data:
                lines += ["{}\t{}\n".format(name, size)]
                data = lines

        if res_type == 'json' and not incl_cum:
            json_out = {}

            for row in data:
                json_out[row[0]] = {
                    'size': int(row[1])
                }

            data = json_out

        if res_type == 'json' and incl_cum:
            json_out = {}
            cum = 0

            for row in data:
                size = int(row[1])

                json_out[row[0]] = {
                    'size': size,
                    'offset': cum
                }
                cum += size

            data = json_out
    except Exception as e:
        logger.exception(e)
        err_msg = 'THIS IS AN OUTRAGE!!!1! Something failed. ðŸ˜¡'
        err_status = 500

        if is_json:
            return response({'error': err_msg}, status=err_status)

        return response(err_msg, status=err_status)

    return response(data)