def eventgenes(request): # get genes out of request try: genes = [int(g) for g in request.GET['genes'].split(',')] except (KeyError, ValueError): genes = None # get abstracts query = request.GET.get('q') if query: abstracts = get_abstracts(query) else: abstracts = None # get genes, 404 if we don't supply either genes or abstracts try: event_genes = get_event_genes(genes=genes, abstracts=abstracts) except KeyError: raise Http404 #return HttpResponse(event_genes) if event_genes: return render_to_response("eventgenes.html", {'event_genes': event_genes, 'genes':genes}) else: raise Http404
def eventsummary(request): response = HttpResponse() # get genes out of request try: genes = [int(g) for g in request.GET['genes'].split(',')] except (KeyError, ValueError): genes = None # get abstracts query = request.GET.get('q') if query: abstracts = get_abstracts(query) else: abstracts = None # get gene combinations try: outergenes = get_gene_combinations(genes=genes, abstracts=abstracts) except KeyError: # error if no gene or abstracts json.dump({'validresult': False, 'errormsg': 'You must supply either genes or a keyword query'}, response) return response # get sorter function from request orderby = request.GET.get('orderby', 'abstracts').lower() sorter = {'abstracts': lambda g: -g.count, 'symbol': lambda g: g.symbol}[orderby] # sort data structure before rendering it outergenes_sorted = [] for og in sorted(outergenes.values(), key=sorter): og.innergenes.sort(key=sorter) outergenes_sorted.append(og) # apply limit try: limit = int(request.GET.get('limit')) outergenes_sorted = outergenes_sorted[:limit] except (TypeError, ValueError): pass # render and return JSON response if outergenes_sorted: json.dump({'validresult': True, 'result': render_to_string("eventsummary.html", {'outergenes': outergenes_sorted, 'genes': genes, 'orderby': orderby})}, response) else: json.dump({'validresult': False, 'errormsg': 'No genes found'}, response) return response
def eventlist(request): """Find and return events matching a query""" maxlimit = 500 # maximum number of events to fetch at once # get genes out of request. First check for internal gene ids, then entrez # ids, then gene symbols. try: genes = [int(g) for g in request.GET['genes'].split(',')] except (KeyError, ValueError): try: gene_eids = [int(i.strip()) for i in request.GET['gene_entrez_ids'].split(',') if i != ''] genes = [g.id for g in Gene.objects.filter(entrez_id__in=gene_eids)] # workaround for database mess if genes == []: raise Http404 except (KeyError, ValueError): try: genes = [g.id for g in gene_lookup(request.GET['gene_symbols'])] except (KeyError, ValueError): genes = None # get abstracts query = request.GET.get('q') if query: abstracts = get_abstracts(query) else: abstracts = None # get limit and offset try: limit = int(request.GET['limit']) if limit > maxlimit: limit = maxlimit except (KeyError, ValueError): limit = maxlimit try: offset = int(request.GET['offset']) except (KeyError, ValueError): offset = 0 # get events, 404 if we don't supply either genes or abstracts try: events = get_events(genes=genes, abstracts=abstracts, limit=limit, offset=offset) except KeyError: events = [] # return the appropriate response if request.GET.get('preview'): genesyms = [g.symbol for g in Gene.objects.filter(id__in=genes).only('symbol')] if genes else [] # show more information about a specific gene # TODO: clean this up during the next re-organization of the event search detail = gene_lookup(request.GET.get('detail')) if len(detail) > 0: summaryrow = get_gene_combinations(genes=genes, abstracts=abstracts).get(detail[0].id) if summaryrow: summaryrow.innergenes.sort(key=lambda g: -g.count) else: summaryrow = None return render_to_response("eventpreview.html", {'events': events, 'geneids':genes, 'genesyms': genesyms, 'q': query, 'summaryrow':summaryrow}) # 404 if there were no events if not events: raise Http404 dl = request.GET.get('download') if dl: if dl.lower() == 'xml': # return xml file response = HttpResponse('<?xml version="1.0" ?>\n<!DOCTYPE eventlist SYSTEM "http://gadget.biostat.wisc.edu/static/eventlist.dtd">\n<eventlist>\n') response.write(xmldescription(query=query, genes=genes, limit=limit, offset=offset)) for ev in events: try: # wrap each event in a try block so if something goes wrong, # we still get all the other events. take this out when debugging. response.write('\n') response.write(ev.xml(indent=2)) except: print "event {0} failed to render XML".format(ev.id) response.write('</eventlist>') response['Content-Type'] = 'text/xml' return response if dl.lower() == 'csv': # create, package, and return a CSV file response = HttpResponse(mimetype='text/csv') response['Content-Disposition'] = 'attachment; filename=gadget-events.csv' response.write('event_id,gene_entrez_ids,gene_symbols,event_types,abstract_count\n' + '\n'.join([ev.tablerow() for ev in events])) return response # render HTML return render_to_response("eventlist.html", {'events':events, 'q':query})
def keyphrasesearch(request): # parse search parameters out of the query string params = searchparams(request) if params.genes or params.usegenefile: try: # get a gene query to run against the abstract index if params.usegenefile: # get genes from an uploaded file genequery = genefile_lookup(params.genefileID, implicitOr=params.implicitOr, usehomologs=params.usehomologs) if params.usehomologs: genelist = map(str, flatten_query(genefile_lookup(params.genefileID, implicitOr=params.implicitOr, usehomologs=False))) else: genelist = map(str, flatten_query(genequery)) else: # get genes from the query string genequery = parse_gene_abstractquery(params.genes, params.species, params.implicitOr, params.usehomologs) # from the gene query, get a list of the gene ID's in the query (as strings) if params.usehomologs: genelist = map(str, flatten_query(parse_gene_abstractquery(params.genes, params.species, params.implicitOr, usehomologs=False))) else: genelist = map(str, flatten_query(genequery)) except LookupError as e: # a term in the gene query couldn't be matched to any genes. return searchresponse(False, params, errmsg='No genes match <b>{0}</b> for species {1}'.format(e.args[0], params.species)) except BadGenefileError: return searchresponse(validresult=False, download=params.download, errmsg="Can't find this gene file! It probably expired. Please upload it again.""") else: genequery = None genelist = [] # set limit to 5000 if we're not given a limit if not params.limit: params.limit = 5000 params.query_limit = 5000 # don't do anything if we don't have a query if not genequery and not params.keywords: return searchresponse(False, params, errmsg="Please enter gene symbols or a keyword query.") # use homology option to decide which gene-abstract table to use if params.usehomologs: geneabstract_tablename = 'homologene_gene_abstract' else: geneabstract_tablename = 'gene_abstract' # get abstracts matching keywords and genes abstracts = get_abstracts(params.keywords, genequery, params.usehomologs) # error if no abstracts matched the query if abstracts == []: return searchresponse(False, params, errmsg="Your query did not match any abstracts.") def paramstring(l): """Return a string of comma-separated %s's of length l (faster and more memory-efficient than using a list comprehension)""" def slist(): for i in xrange(l): yield "%s" return ','.join(slist()) if genelist: # query if we have genes if params.orderby in gene_query_orderbys: query_orderby = params.orderby else: query_orderby = 'gene_f1_score' sqlquery = \ """ select a.*, k.`string` `string` from ( select ka.`keyphrase` `id`, kgc.`genecount` total_genes, count(distinct ga_query.`gene`) query_genes, count(distinct ga_query.`gene`) / {gene_list_size} gene_recall, count(distinct ga_query.`gene`) / kgc.`genecount` gene_precision, 2 * (count(distinct ga_query.`gene`) / kgc.`genecount`) * (count(distinct ga_query.`gene`) / {gene_list_size}) / ((count(distinct ga_query.`gene`) / kgc.`genecount`) + (count(distinct ga_query.`gene`) / {gene_list_size})) gene_f1_score, ka.`abstractcount` total_abstracts, count(distinct ka.`abstract`) query_abstracts, count(distinct ka.`abstract`) / ka.`abstractcount` abstract_precision, count(distinct ka.`abstract`) / {abstract_list_size} abstract_recall, 2 * (count(distinct ka.`abstract`) / ka.`abstractcount`) * (count(distinct ka.`abstract`) / {abstract_list_size}) / ((count(distinct ka.`abstract`) / ka.`abstractcount`) + (count(distinct ka.`abstract`) / {abstract_list_size})) abstract_f1_score from `keyphrase_abstract` ka inner join `keyphrase_genecounts` kgc on kgc.`keyphrase` = ka.`keyphrase` inner join `{geneabstract_tablename}` ga_query on ka.abstract = ga_query.abstract where ka.abstract in ({abstract_param_list}) and ga_query.`gene` in ({genes_param_list}) and kgc.`tax` = %s group by ka.`keyphrase` order by {query_orderby} desc limit %s, %s ) a inner join `keyphrase` k on k.`id` = a.`id`; """.format(geneabstract_tablename=geneabstract_tablename, abstract_param_list=paramstring(len(abstracts)), genes_param_list=paramstring(len(genelist)), gene_list_size=len(genelist), abstract_list_size=len(abstracts), query_orderby=query_orderby) #with open('/home/genetext/query.sql', 'w') as f: # f.write(sqlquery % tuple(abstracts + genelist + [params.species, params.offset, params.query_limit])) result = KeyPhrase.objects.raw(sqlquery, abstracts + genelist + [params.species, params.offset, params.query_limit]) else: # query if we don't have genes if params.orderby in abstract_query_orderbys: query_orderby = params.orderby else: query_orderby = 'abstract_f1_score' sqlquery = \ """ select a.*, k.`string` from ( select ka.`keyphrase` `id`, null total_genes, null query_genes, null gene_recall, null gene_precision, null gene_f1_score, ka.`abstractcount` total_abstracts, count(distinct ka.`abstract`) query_abstracts, count(distinct ka.`abstract`) / ka.`abstractcount` abstract_precision, count(distinct ka.`abstract`) / {abstract_list_size} abstract_recall, 2 * (count(distinct ka.`abstract`) / ka.`abstractcount`) * (count(distinct ka.`abstract`) / {abstract_list_size}) / ((count(distinct ka.`abstract`) / ka.`abstractcount`) + (count(distinct ka.`abstract`) / {abstract_list_size})) abstract_f1_score from `keyphrase_abstract` ka where ka.`abstract` in ({abstract_param_list}) group by ka.`keyphrase` order by {query_orderby} desc limit %s, %s ) a inner join `keyphrase` k on k.`id` = a.`id` """.format(abstract_param_list=paramstring(len(abstracts)), abstract_list_size=len(abstracts), query_orderby=query_orderby) #with open('/home/genetext/query.sql', 'w') as f: # f.write(sqlquery % tuple(abstracts + [params.offset, params.query_limit])) result = KeyPhrase.objects.raw(sqlquery, abstracts + [params.offset, params.query_limit]) # Check to see if the resultset is empty. Django's RawQuerySet object # doesn't have an empty() or __len__(), and is always True. # So, try getting the first item instead. try: result[0] except IndexError: return searchresponse(False, params, errmsg="Your query didn't match any keywords!") return searchresponse(True, params, result=result, abstractcount=len(abstracts))