def get_chromsizes(tileset): ''' Get a set of chromsizes matching the coordSystem of this tileset. Parameters ---------- tileset: A tileset DJango model object Returns ------- chromsizes: [[chrom, sizes]] A set of chromsizes to be used with this bigWig file. None if no chromsizes tileset with this coordSystem exists or if two exist with this coordSystem. ''' if tileset.coordSystem is None or len(tileset.coordSystem) == None: return None try: chrom_info_tileset = tm.Tileset.objects.get( coordSystem=tileset.coordSystem, datatype='chromsizes') except: return None return tcs.get_tsv_chromsizes(chrom_info_tileset.datafile.path)
def tileset_info(request): """Get information about a tileset Tilesets have information critical to their display such as the maximum number of dimensions and well as their width. This needs to be relayed to the client in order for it to know which tiles to request. Args: request (django.http.HTTPRequest): The request object containing tileset_ids in the 'd' parameter. Return: django.http.JsonResponse: A JSON object containing the tileset meta-information """ queryset = tm.Tileset.objects.all() tileset_uuids = request.GET.getlist("d") tileset_infos = {} chromsizes_error = None if "cs" in request.GET: # we need to call a different server to get the tiles if not "ci" in request.GET.getlist: chromsizes_error = "cs param present without ci" # call the request server and get the chromsizes pass else: if "ci" in request.GET: try: chromsizes = tm.Tileset.objects.get(uuid=request.GET["ci"]) data = tcs.chromsizes_array_to_series( tcs.get_tsv_chromsizes(chromsizes.datafile.path) ) except Exception as ex: pass for tileset_uuid in tileset_uuids: tileset_object = queryset.filter(uuid=tileset_uuid).first() if tileset_uuid == "osm-image": tileset_infos[tileset_uuid] = { "min_x": 0, "max_height": 134217728, "min_y": 0, "max_y": 134217728, "max_zoom": 19, "tile_size": 256, } continue if tileset_object is None: tileset_infos[tileset_uuid] = { "error": "No such tileset with uid: {}".format(tileset_uuid) } continue if tileset_object.private and request.user != tileset_object.owner: # dataset is not public tileset_infos[tileset_uuid] = {"error": "Forbidden"} continue if tileset_object.requiresAuthentication and not request.user.is_authenticated: # dataset is not public tileset_infos[tileset_uuid] = { "error": "This request required authentication" } continue #print(tileset_uuid,"accessibleTilesets" in request.session) if tileset_object.requiresAuthentication and ( ("accessibleTilesets" not in request.session) or (tileset_uuid not in request.session["accessibleTilesets"]) ): # dataset is not accessible for this user tileset_infos[tileset_uuid] = { "error": "You don't have access to this tileset" } continue if tileset_object.filetype == "hitile" or tileset_object.filetype == "hibed": tileset_info = hdft.get_tileset_info( h5py.File(tileset_object.datafile.path, "r") ) tileset_infos[tileset_uuid] = { "min_pos": [int(tileset_info["min_pos"])], "max_pos": [int(tileset_info["max_pos"])], "max_width": 2 ** math.ceil( math.log(tileset_info["max_pos"] - tileset_info["min_pos"]) / math.log(2) ), "tile_size": int(tileset_info["tile_size"]), "max_zoom": int(tileset_info["max_zoom"]), } elif tileset_object.filetype == "bigwig": chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes) # print('tsinfo:', tsinfo) if "chromsizes" in tsinfo: tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == "bigbed": chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes) # print('tsinfo:', tsinfo) if "chromsizes" in tsinfo: tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == "multivec": tileset_infos[tileset_uuid] = hgmu.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "elastic_search": response = urllib.urlopen(tileset_object.datafile + "/tileset_info") tileset_infos[tileset_uuid] = json.loads(response.read()) elif tileset_object.filetype == "beddb": tileset_infos[tileset_uuid] = cdt.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "bed2ddb": tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "cooler": tileset_infos[tileset_uuid] = hgco.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "time-interval-json": tileset_infos[tileset_uuid] = hgti.tileset_info( tileset_object.datafile.path ) elif ( tileset_object.filetype == "2dannodb" or tileset_object.filetype == "imtiles" ): tileset_infos[tileset_uuid] = hgim.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "geodb": tileset_infos[tileset_uuid] = hggo.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "bam": tileset_infos[tileset_uuid] = ctb.tileset_info(tileset_object.datafile.path) tileset_infos[tileset_uuid]["max_tile_width"] = hss.MAX_BAM_TILE_WIDTH else: # Unknown filetype tileset_infos[tileset_uuid] = { "error": "Unknown filetype " + tileset_object.filetype } tileset_infos[tileset_uuid]["name"] = tileset_object.name tileset_infos[tileset_uuid]["datatype"] = tileset_object.datatype tileset_infos[tileset_uuid]["coordSystem"] = tileset_object.coordSystem tileset_infos[tileset_uuid]["coordSystem2"] = tileset_object.coordSystem2 return JsonResponse(tileset_infos)
def sizes(request): """Return chromosome sizes. Retrieves the chromSiyes.tsv and either retrieves it as is or converts it to a JSON format. Args: request: HTTP GET request object. The request can feature the following queries: id: id of the stored chromSizes [e.g.: hg19 or mm9] type: return data format [tsv or json] cum: return cumulative size or offset [0 or 1] Returns: A HTTP text or JSON response depending on the GET request. A text response looks like this: ``` chr1 1 chr2 2 ... ``` A JSON response looks like this: ``` { chr1: { size: 1, offset: 0 } chr2: { size: 2, offset: 1 }, ... } ``` """ uuid = request.GET.get("id", False) res_type = request.GET.get("type", "tsv") incl_cum = request.GET.get("cum", False) response = HttpResponse is_json = False if res_type == "json": is_json = True response = JsonResponse if res_type != "json" and incl_cum: return response( "Sorry buddy. Cumulative sizes not yet supported for non-JSON " "file types. 😞", status=501, ) # Try to find the db entry try: chrom_sizes = tm.Tileset.objects.get(uuid=uuid) except Exception as e: logger.exception(e) err_msg = "Oh lord! ChromSizes for %s not found. 😬" % uuid err_status = 404 if is_json: return response({"error": err_msg}, status=err_status) return response(err_msg, status=err_status) # Try to load the chromosome sizes and return them as a list of # (name, size) tuples try: if tgt.get_tileset_filetype(chrom_sizes) == "bigwig": data = hgbi.chromsizes(chrom_sizes.datafile.path) elif tgt.get_tileset_filetype(chrom_sizes) == "bigbed": data = hgbb.chromsizes(chrom_sizes.datafile.path) elif tgt.get_tileset_filetype(chrom_sizes) == "cooler": data = tcs.get_cooler_chromsizes(chrom_sizes.datafile.path) elif tgt.get_tileset_filetype(chrom_sizes) == "chromsizes-tsv": data = tcs.get_tsv_chromsizes(chrom_sizes.datafile.path) elif tgt.get_tileset_filetype(chrom_sizes) == "multivec": data = tcs.get_multivec_chromsizes(chrom_sizes.datafile.path) else: data = "" except Exception as ex: logger.exception(ex) err_msg = str(ex) err_status = 500 if is_json: return response({"error": err_msg}, status=err_status) return response(err_msg, status=err_status) # Convert the stuff if needed try: # data should be a list of (name, size) tuples coming # coming and converted to a more appropriate data type # going out if res_type == "tsv": lines = [] for (name, size) in data: lines += ["{}\t{}\n".format(name, size)] data = lines if res_type == "json" and not incl_cum: json_out = {} for row in data: json_out[row[0]] = {"size": int(row[1])} data = json_out if res_type == "json" and incl_cum: json_out = {} cum = 0 for row in data: size = int(row[1]) json_out[row[0]] = {"size": size, "offset": cum} cum += size data = json_out except Exception as e: logger.exception(e) err_msg = "THIS IS AN OUTRAGE!!!1! Something failed. 😡" err_status = 500 if is_json: return response({"error": err_msg}, status=err_status) return response(err_msg, status=err_status) return response(data)
def check_for_chromsizes(filename, coord_system): ''' Check to see if we have chromsizes matching the coord system of the filename. Parameters ---------- filename: string The name of the bigwig file coord_system: string The coordinate system (assembly) of this bigwig file ''' tileset_info = hgbi.tileset_info(filename) # print("tileset chromsizes:", tileset_info['chromsizes']) tsinfo_chromsizes = set([(str(chrom), str(size)) for chrom, size in tileset_info['chromsizes']]) # print("tsinfo_chromsizes:", tsinfo_chromsizes) chrom_info_tileset = None # check if we have a chrom sizes tileset that matches the coordsystem # of the input file if coord_system is not None and len(coord_system) > 0: try: chrom_info_tileset = tm.Tileset.objects.filter( coordSystem=coord_system, datatype='chromsizes') if len(chrom_info_tileset) > 1: raise CommandError("More than one available set of chromSizes" + "for this coordSystem ({})".format(coord_system)) chrom_info_tileset = chrom_info_tileset.first() except dce.ObjectDoesNotExist: chrom_info_tileset = None matches = [] if chrom_info_tileset is None: # we haven't found chromsizes matching the coordsystem # go through every chromsizes file and see if we have a match for chrom_info_tileset in tm.Tileset.objects.filter(datatype='chromsizes'): chromsizes_set = set([tuple(t) for t in tcs.get_tsv_chromsizes(chrom_info_tileset.datafile.path)]) matches += [(len(set.intersection(chromsizes_set, tsinfo_chromsizes)), chrom_info_tileset)] # print("chrom_info_tileset:", chromsizes_set) #print("intersection:", len(set.intersection(chromsizes_set, tsinfo_chromsizes))) #print("coord_system:", coord_system) else: # a set of chromsizes was provided chromsizes_set = set([tuple(t) for t in tcs.get_tsv_chromsizes(chrom_info_tileset.datafile.path)]) matches += [(len(set.intersection(chromsizes_set, tsinfo_chromsizes)), chrom_info_tileset)] # matches that overlap some chromsizes with the bigwig file overlap_matches = [m for m in matches if m[0] > 0] if len(overlap_matches) == 0: raise CommandError("No chromsizes available which match the chromosomes in this bigwig" + "See http://docs.higlass.io/data_preparation.html#bigwig-files " + "for more information" ) if len(overlap_matches) > 1: raise CommandError("Multiple matching coordSystems:" + "See http://docs.higlass.io/data_preparation.html#bigwig-files " + "for more information", ["({} [{}])".format(t[1].coordSystem, t[0]) for t in overlap_matches]) if (coord_system is not None and len(coord_system) > 0 and overlap_matches[0][1].coordSystem != coord_system): raise CommandError("Matching chromosome sizes (coordSystem: {}) do not " + "match the specified coordinate sytem ({}). " + "Either omit the coordSystem or specify a matching one." + "See http://docs.higlass.io/data_preparation.html#bigwig-files " + "for more information".format(overlap_matches[0][1].coordSystem, coord_system)) if (coord_system is not None and len(coord_system) > 0 and overlap_matches[0][1].coordSystem == coord_system): print("Using coordinates for coordinate system: {}".format(coord_system)) if coord_system is None or len(coord_system) == 0: print("No coordinate system specified, but we found matching " + "chromsizes. Using coordinate system {}." .format(overlap_matches[0][1].coordSystem)) return overlap_matches[0][1].coordSystem
def tileset_info(request): ''' Get information about a tileset Tilesets have information critical to their display such as the maximum number of dimensions and well as their width. This needs to be relayed to the client in order for it to know which tiles to request. Args: request (django.http.HTTPRequest): The request object containing tileset_ids in the 'd' parameter. Return: django.http.JsonResponse: A JSON object containing the tileset meta-information ''' queryset = tm.Tileset.objects.all() tileset_uuids = request.GET.getlist("d") tileset_infos = {} chromsizes_error = None if 'cs' in request.GET: # we need to call a different server to get the tiles if not 'ci' in request.GET.getlist: chromsizes_error = 'cs param present without ci' # call the request server and get the chromsizes pass else: if 'ci' in request.GET: try: chromsizes = tm.Tileset.objects.get(uuid=request.GET['ci']) data = tcs.chromsizes_array_to_series( tcs.get_tsv_chromsizes(chromsizes.datafile.path)) except Exception as ex: pass for tileset_uuid in tileset_uuids: tileset_object = queryset.filter(uuid=tileset_uuid).first() if tileset_uuid == 'osm-image': tileset_infos[tileset_uuid] = { 'min_x': 0, 'max_height': 134217728, 'min_y': 0, 'max_y': 134217728, 'max_zoom': 19, 'tile_size': 256 } continue if tileset_object is None: tileset_infos[tileset_uuid] = { 'error': 'No such tileset with uid: {}'.format(tileset_uuid) } continue if tileset_object.private and request.user != tileset_object.owner: # dataset is not public tileset_infos[tileset_uuid] = {'error': "Forbidden"} continue if ( tileset_object.filetype == 'hitile' or tileset_object.filetype == 'hibed' ): tileset_info = hdft.get_tileset_info( h5py.File(tileset_object.datafile.path, 'r')) tileset_infos[tileset_uuid] = { "min_pos": [int(tileset_info['min_pos'])], "max_pos": [int(tileset_info['max_pos'])], "max_width": 2 ** math.ceil( math.log( tileset_info['max_pos'] - tileset_info['min_pos'] ) / math.log(2) ), "tile_size": int(tileset_info['tile_size']), "max_zoom": int(tileset_info['max_zoom']) } elif tileset_object.filetype == 'bigwig': chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info( tileset_object.datafile.path, chromsizes ) #print('tsinfo:', tsinfo) if 'chromsizes' in tsinfo: tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == 'bigbed': chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info( tileset_object.datafile.path, chromsizes ) #print('tsinfo:', tsinfo) if 'chromsizes' in tsinfo: tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == 'multivec': tileset_infos[tileset_uuid] = hgmu.tileset_info( tileset_object.datafile.path) elif tileset_object.filetype == "elastic_search": response = urllib.urlopen( tileset_object.datafile + "/tileset_info") tileset_infos[tileset_uuid] = json.loads(response.read()) elif tileset_object.filetype == 'beddb': tileset_infos[tileset_uuid] = cdt.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'bed2ddb': tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'cooler': tileset_infos[tileset_uuid] = hgco.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'time-interval-json': tileset_infos[tileset_uuid] = hgti.tileset_info( tileset_object.datafile.path ) elif ( tileset_object.filetype == '2dannodb' or tileset_object.filetype == 'imtiles' ): tileset_infos[tileset_uuid] = hgim.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'geodb': tileset_infos[tileset_uuid] = hggo.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'bam': tileset_infos[tileset_uuid] = ctb.tileset_info( tileset_object.datafile.path ) tileset_infos[tileset_uuid]['max_tile_width'] = hss.MAX_BAM_TILE_WIDTH else: # Unknown filetype tileset_infos[tileset_uuid] = { 'error': 'Unknown filetype ' + tileset_object.filetype } tileset_infos[tileset_uuid]['name'] = tileset_object.name tileset_infos[tileset_uuid]['datatype'] = tileset_object.datatype tileset_infos[tileset_uuid]['coordSystem'] = tileset_object.coordSystem tileset_infos[tileset_uuid]['coordSystem2'] =\ tileset_object.coordSystem2 return JsonResponse(tileset_infos)
def sizes(request): '''Return chromosome sizes. Retrieves the chromSiyes.tsv and either retrieves it as is or converts it to a JSON format. Args: request: HTTP GET request object. The request can feature the following queries: id: id of the stored chromSizes [e.g.: hg19 or mm9] type: return data format [tsv or json] cum: return cumulative size or offset [0 or 1] Returns: A HTTP text or JSON response depending on the GET request. A text response looks like this: ``` chr1 1 chr2 2 ... ``` A JSON response looks like this: ``` { chr1: { size: 1, offset: 0 } chr2: { size: 2, offset: 1 }, ... } ``` ''' uuid = request.GET.get('id', False) res_type = request.GET.get('type', 'tsv') incl_cum = request.GET.get('cum', False) response = HttpResponse is_json = False if res_type == 'json': is_json = True response = JsonResponse if res_type != 'json' and incl_cum: return response( 'Sorry buddy. Cumulative sizes not yet supported for non-JSON ' 'file types. 😞', status=501 ) # Try to find the db entry try: chrom_sizes = tm.Tileset.objects.get(uuid=uuid) except Exception as e: logger.exception(e) err_msg = 'Oh lord! ChromSizes for %s not found. 😬' % uuid err_status = 404 if is_json: return response({'error': err_msg}, status=err_status) return response(err_msg, status=err_status) # Try to load the chromosome sizes and return them as a list of # (name, size) tuples try: if tgt.get_tileset_filetype(chrom_sizes) == 'bigwig': data = hgbi.chromsizes(tut.get_datapath(chrom_sizes.datafile.url)) elif tgt.get_tileset_filetype(chrom_sizes) == 'cooler': data = tcs.get_cooler_chromsizes(tut.get_datapath(chrom_sizes.datafile.url)) elif tgt.get_tileset_filetype(chrom_sizes) == 'chromsizes-tsv': data = tcs.get_tsv_chromsizes(tut.get_datapath(chrom_sizes.datafile.url)) elif tgt.get_tileset_filetype(chrom_sizes) == 'multivec': data = tcs.get_multivec_chromsizes(tut.get_datapath(chrom_sizes.datafile.url)) else: data = ''; except Exception as ex: logger.exception(ex) err_msg = str(ex) print('err_msg:', err_msg) err_status = 500 if is_json: return response({'error': err_msg}, status=err_status) return response(err_msg, status=err_status) # Convert the stuff if needed try: # data should be a list of (name, size) tuples coming # coming and converted to a more appropriate data type # going out if res_type == 'tsv': lines = [] for (name, size) in data: lines += ["{}\t{}\n".format(name, size)] data = lines if res_type == 'json' and not incl_cum: json_out = {} for row in data: json_out[row[0]] = { 'size': int(row[1]) } data = json_out if res_type == 'json' and incl_cum: json_out = {} cum = 0 for row in data: size = int(row[1]) json_out[row[0]] = { 'size': size, 'offset': cum } cum += size data = json_out except Exception as e: logger.exception(e) err_msg = 'THIS IS AN OUTRAGE!!!1! Something failed. 😡' err_status = 500 if is_json: return response({'error': err_msg}, status=err_status) return response(err_msg, status=err_status) return response(data)