def convert_to_mouse_gene_id(self, species=None, gene_id=None): """If the species is rat or human, translate the gene_id to the mouse geneid If there is no input gene_id or there's no corresponding mouse gene_id, return None """ if not gene_id: return None mouse_gene_id = None if species == 'mouse': mouse_gene_id = gene_id elif species == 'rat': query = """SELECT mouse FROM GeneIDXRef WHERE rat='%s'""" % escape(gene_id) result = g.db.execute(query).fetchone() if result != None: mouse_gene_id = result.mouse elif species == 'human': query = """SELECT mouse FROM GeneIDXRef WHERE human='%s'""" % escape(gene_id) result = g.db.execute(query).fetchone() if result != None: mouse_gene_id = result.mouse return mouse_gene_id
def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): input_trait_mouse_gene_id = convert_to_mouse_gene_id( target_dataset.group.species.lower(), this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(trait_geneid_dict.items()): mouse_gene_id = convert_to_mouse_gene_id( target_dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: result = g.db.execute( """SELECT value FROM LCorrRamin3 WHERE GeneId1='%s' and GeneId2='%s' """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) ).fetchone() if not result: result = g.db.execute("""SELECT value FROM LCorrRamin3 WHERE GeneId2='%s' and GeneId1='%s' """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) ).fetchone() if result: lit_corr = result.value lit_corr_data[trait] = [gene_id, lit_corr] else: lit_corr_data[trait] = [gene_id, 0] else: lit_corr_data[trait] = [gene_id, 0] return lit_corr_data
def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) if self.search_term[0] == "*": query = (self.base_query + """%s WHERE PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % (from_clause, escape(str(self.dataset.group.id)), escape(str(self.dataset.id)))) else: query = ( self.base_query + """%s WHERE %s and PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % (from_clause, where_clause, escape(str( self.dataset.group.id)), escape(str(self.dataset.id)))) return query
def do_lit_correlation_for_trait_list(self): input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) for trait in self.correlation_results: if trait.geneid: trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid) else: trait.mouse_gene_id = None if trait.mouse_gene_id and str(trait.mouse_gene_id).find(";") == -1: result = g.db.execute( """SELECT value FROM LCorrRamin3 WHERE GeneId1='%s' and GeneId2='%s' """ % (escape(str(trait.mouse_gene_id)), escape(str(input_trait_mouse_gene_id))) ).fetchone() if not result: result = g.db.execute("""SELECT value FROM LCorrRamin3 WHERE GeneId2='%s' and GeneId1='%s' """ % (escape(str(trait.mouse_gene_id)), escape(str(input_trait_mouse_gene_id))) ).fetchone() if result: lit_corr = result.value trait.lit_corr = lit_corr else: trait.lit_corr = 0 else: trait.lit_corr = 0
def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) query = ( self.base_query + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), where_clause, escape(str(self.dataset.id)))) return query
def get_alias_where_clause(self): search_string = escape(self.search_term[0]) if self.search_term[0] != "*": match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % ( search_string) else: match_clause = "" where_clause = (match_clause + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) return where_clause
def run_combined(self, from_clause, where_clause): """Generates and runs a combined search of an phenotype dataset""" logger.debug("Running PhenotypeSearch") from_clause = self.normalize_spaces(from_clause) query = (self.base_query + """%s WHERE %s PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s""" % (from_clause, where_clause, escape(str( self.dataset.group.id)), escape(str(self.dataset.id)))) return self.execute(query)
def retrieve_other_names(self): """This method fetches the the dataset names in search_result. If the data set name parameter is not found in the 'Name' field of the data set table, check if it is actually the FullName or ShortName instead. This is not meant to retrieve the data set info if no name at all is passed. """ try: if self.type == "ProbeSet": query_args = tuple( escape(x) for x in (self.name, self.name, self.name)) self.id, self.name, self.fullname, self.shortname, self.data_scale, self.tissue = fetch1( """ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, Tissue.Name FROM ProbeSetFreeze, ProbeFreeze, Tissue WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND ProbeFreeze.TissueId = Tissue.Id AND (ProbeSetFreeze.Name = '%s' OR ProbeSetFreeze.FullName = '%s' OR ProbeSetFreeze.ShortName = '%s') """ % (query_args), "/dataset/" + self.name + ".json", lambda r: (r["id"], r["name"], r["full_name"], r[ "short_name"], r["data_scale"], r["tissue"])) else: query_args = tuple( escape(x) for x in ((self.type + "Freeze"), self.name, self.name, self.name)) self.tissue = "N/A" self.id, self.name, self.fullname, self.shortname = fetchone( """ SELECT Id, Name, FullName, ShortName FROM %s WHERE (Name = '%s' OR FullName = '%s' OR ShortName = '%s') """ % (query_args)) except TypeError: logger.debug( "Dataset {} is not yet available in GeneNetwork.".format( self.name)) pass
def run_combined(self, from_clause='', where_clause=''): """Generates and runs a combined search of an mRNA expression dataset""" logger.debug("Running ProbeSetSearch") #query = self.base_query + from_clause + " WHERE " + where_clause from_clause = self.normalize_spaces(from_clause) query = ( self.base_query + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), where_clause, escape(str(self.dataset.id)))) return self.execute(query)
def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) if self.search_term[0] == "*": query = (self.base_query + """WHERE Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id and GenoFreeze.Id = %s""" % (escape(str(self.dataset.id)))) else: query = (self.base_query + """WHERE %s and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id and GenoFreeze.Id = %s""" % (where_clause, escape(str(self.dataset.id)))) return query
def retrieve_genes(self, column_name): query = """ select ProbeSet.Name, ProbeSet.%s from ProbeSet,ProbeSetXRef where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSetXRef.ProbeSetId=ProbeSet.Id; """ % (column_name, escape(str(self.id))) logger.sql(query) results = g.db.execute(query).fetchall() return dict(results)
def get_where_clause(self): field = 'GOterm.acc' go_id = 'GO:' + ('0000000' + self.search_term[0])[-7:] statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and GOterm.id=GOassociation.term_id""" % (escape(self.dataset.type))) where_clause = " %s = '%s' and %s " % (field, go_id, statements) return where_clause
def get_where_clause(self): search_string = escape(self.search_term[0]) if self.search_term[0] != "*": match_clause = """((MATCH (ProbeSet.Name, ProbeSet.description, ProbeSet.symbol, alias, GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE))) AND """ % (search_string) else: match_clause = "" where_clause = (match_clause + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) return where_clause
def do_lit_correlation_for_all_traits(self): input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(self.trait_geneid_dict.items()): mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: #print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) result = g.db.execute( """SELECT value FROM LCorrRamin3 WHERE GeneId1='%s' and GeneId2='%s' """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) ).fetchone() if not result: result = g.db.execute("""SELECT value FROM LCorrRamin3 WHERE GeneId2='%s' and GeneId1='%s' """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) ).fetchone() if result: #print("result:", result) lit_corr = result.value lit_corr_data[trait] = [gene_id, lit_corr] else: lit_corr_data[trait] = [gene_id, 0] else: lit_corr_data[trait] = [gene_id, 0] lit_corr_data = collections.OrderedDict(sorted(list(lit_corr_data.items()), key=lambda t: -abs(t[1][1]))) return lit_corr_data
def retrieve_sample_data(self, trait): query = """ SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 FROM (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) left join ProbeSetSE on (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) left join NStrain on (NStrain.DataId = ProbeSetData.Id AND NStrain.StrainId = ProbeSetData.StrainId) WHERE ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSetXRef.DataId = ProbeSetData.Id AND ProbeSetData.StrainId = Strain.Id Order BY Strain.Name """ % (escape(trait), escape(self.name)) logger.sql(query) results = g.db.execute(query).fetchall() #logger.debug("RETRIEVED RESULTS HERE:", results) return results
def setup(self): #logger.debug("IS A PHENOTYPEDATASET") # Fields in the database table self.search_fields = [ 'Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', 'Phenotype.Post_publication_abbreviation', 'PublishXRef.mean', 'Phenotype.Lab_code', 'Publication.PubMed_ID', 'Publication.Abstract', 'Publication.Title', 'Publication.Authors', 'PublishXRef.Id' ] # Figure out what display_fields is self.display_fields = [ 'name', 'group_code', 'pubmed_id', 'pre_publication_description', 'post_publication_description', 'original_description', 'pre_publication_abbreviation', 'post_publication_abbreviation', 'mean', 'lab_code', 'submitter', 'owner', 'authorized_users', 'authors', 'title', 'abstract', 'journal', 'volume', 'pages', 'month', 'year', 'sequence', 'units', 'comments' ] # Fields displayed in the search results table header self.header_fields = [ 'Index', 'Record', 'Description', 'Authors', 'Year', 'Max LRS', 'Max LRS Location', 'Additive Effect' ] self.type = 'Publish' self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM InbredSet, PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id AND PublishFreeze.Name = "%s" ''' % escape(self.name)
def setup(self): # Fields in the database table self.search_fields = ['Name', 'Chr'] # Find out what display_fields is self.display_fields = ['name', 'chr', 'mb', 'source2', 'sequence'] # Fields displayed in the search results table header self.header_fields = ['Index', 'ID', 'Location'] # Todo: Obsolete or rename this field self.type = 'Geno' self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM InbredSet, GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id AND GenoFreeze.Name = "%s" ''' % escape(self.name)
def setup(self): # Fields in the database table self.search_fields = [ 'Name', 'Description', 'Probe_Target_Description', 'Symbol', 'Alias', 'GenbankId', 'UniGeneId', 'RefSeq_TranscriptId' ] # Find out what display_fields is self.display_fields = [ 'name', 'symbol', 'description', 'probe_target_description', 'chr', 'mb', 'alias', 'geneid', 'genbankid', 'unigeneid', 'omim', 'refseq_transcriptid', 'blatseq', 'targetseq', 'chipid', 'comments', 'strand_probe', 'strand_gene', 'proteinid', 'uniprotid', 'probe_set_target_region', 'probe_set_specificity', 'probe_set_blat_score', 'probe_set_blat_mb_start', 'probe_set_blat_mb_end', 'probe_set_strand', 'probe_set_note_by_rw', 'flag' ] # Fields displayed in the search results table header self.header_fields = [ 'Index', 'Record', 'Symbol', 'Description', 'Location', 'Mean', 'Max LRS', 'Max LRS Location', 'Additive Effect' ] # Todo: Obsolete or rename this field self.type = 'ProbeSet' self.query_for_group = ''' SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType FROM InbredSet, ProbeSetFreeze, ProbeFreeze WHERE ProbeFreeze.InbredSetId = InbredSet.Id AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND ProbeSetFreeze.Name = "%s" ''' % escape(self.name)
def get_where_clause(self): if self.search_operator == "=": assert isinstance(self.search_term, (list, tuple)) self.range_min, self.range_max = self.search_term[:2] where_clause = """ (SELECT Pow(2, max(value) -min(value)) FROM ProbeSetData WHERE ProbeSetData.Id = ProbeSetXRef.dataId) > %s AND (SELECT Pow(2, max(value) -min(value)) FROM ProbeSetData WHERE ProbeSetData.Id = ProbeSetXRef.dataId) < %s """ % self.mescape( min(self.range_min, self.range_max), max(self.range_min, self.range_max)) else: # Deal with >, <, >=, and <= where_clause = """ (SELECT Pow(2, max(value) -min(value)) FROM ProbeSetData WHERE ProbeSetData.Id = ProbeSetXRef.dataId) > %s """ % (escape(self.search_term[0])) logger.debug("where_clause is:", pf(where_clause)) return where_clause
def get_where_clause(self, cis_trans): self.mb_buffer = 5 # default chromosome = None if cis_trans == "cis": the_operator = "<" else: the_operator = ">" if self.search_operator == "=": if len(self.search_term) == 2 or len(self.search_term) == 3: self.search_term = [float(value) for value in self.search_term] if len(self.search_term) == 2: lrs_min, lrs_max = self.search_term #[int(value) for value in self.search_term] elif len(self.search_term) == 3: lrs_min, lrs_max, self.mb_buffer = self.search_term elif len(self.search_term) == 4: lrs_min, lrs_max, self.mb_buffer = [ float(value) for value in self.search_term[:3] ] chromosome = self.search_term[3] if "Chr" in chromosome or "chr" in chromosome: chromosome = int(chromosome[3:]) else: SomeError if self.search_type == "CISLOD" or self.search_type == "TRANSLOD": lrs_min = lrs_min * 4.61 lrs_max = lrs_max * 4.61 sub_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s and """ % ( escape(self.dataset.type), escape(str(min(lrs_min, lrs_max))), escape(self.dataset.type), escape(str(max(lrs_min, lrs_max)))) else: # Deal with >, <, >=, and <= sub_clause = """ %sXRef.LRS %s %s and """ % (escape( self.dataset.type), escape( self.search_operator), escape(self.search_term[0])) if cis_trans == "cis": where_clause = sub_clause + """ ABS(%s.Mb-Geno.Mb) %s %s and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr""" % ( escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(str(self.species_id)), escape(self.dataset.type)) else: if chromosome: location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % ( escape(self.dataset.type), chromosome, escape(self.dataset.type), escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), chromosome) else: location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % ( escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape( self.dataset.type), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and (%s)""" % (escape(self.dataset.type), escape(str(self.species_id)), location_clause) return where_clause
def mescape(self, *items): """Multiple escape""" escaped = [escape(str(item)) for item in items] logger.debug("escaped is:", escaped) return tuple(escaped)
def get_trait_data(self, sample_list=None): if sample_list: self.samplelist = sample_list else: self.samplelist = self.group.samplelist if self.group.parlist != None and self.group.f1list != None: if (self.group.parlist + self.group.f1list) in self.samplelist: self.samplelist += self.group.parlist + self.group.f1list query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN {} and Strain.SpeciesId=Species.Id and Species.name = '{}' """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) logger.sql(query) results = dict(g.db.execute(query).fetchall()) sample_ids = [results[item] for item in self.samplelist] # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks # Postgres doesn't have that limit, so we can get rid of this after we transition chunk_size = 50 number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) trait_sample_data = [] for sample_ids_step in chunks.divide_into_chunks( sample_ids, number_chunks): if self.type == "Publish": dataset_type = "Phenotype" else: dataset_type = self.type temp = ['T%s.value' % item for item in sample_ids_step] if self.type == "Publish": query = "SELECT {}XRef.Id,".format(escape(self.type)) else: query = "SELECT {}.Name,".format(escape(dataset_type)) data_start_pos = 1 query += ', '.join(temp) query += ' FROM ({}, {}XRef, {}Freeze) '.format( *mescape(dataset_type, self.type, self.type)) for item in sample_ids_step: query += """ left join {}Data as T{} on T{}.Id = {}XRef.DataId and T{}.StrainId={}\n """.format( *mescape(self.type, item, item, self.type, item, item)) if self.type == "Publish": query += """ WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId and {}Freeze.Name = '{}' and {}.Id = {}XRef.{}Id order by {}.Id """.format(*mescape(self.type, self.type, self.type, self.name, dataset_type, self.type, dataset_type, dataset_type)) else: query += """ WHERE {}XRef.{}FreezeId = {}Freeze.Id and {}Freeze.Name = '{}' and {}.Id = {}XRef.{}Id order by {}.Id """.format(*mescape( self.type, self.type, self.type, self.type, self.name, dataset_type, self.type, self.type, dataset_type)) #logger.debug("trait data query: ", query) logger.sql(query) results = g.db.execute(query).fetchall() #logger.debug("query results:", results) trait_sample_data.append(results) trait_count = len(trait_sample_data[0]) self.trait_data = collections.defaultdict(list) # put all of the separate data together into a dictionary where the keys are # trait names and values are lists of sample values for trait_counter in range(trait_count): trait_name = trait_sample_data[0][trait_counter][0] for chunk_counter in range(int(number_chunks)): self.trait_data[trait_name] += ( trait_sample_data[chunk_counter][trait_counter] [data_start_pos:])
def retrieve_trait_info(trait, dataset, get_qtl_info=False): assert dataset, "Dataset doesn't exist" resource_id = get_resource_id(dataset, trait.name) if dataset.type == 'Publish': the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format( resource_id, g.user_session.user_id) else: the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format( resource_id, g.user_session.user_id, trait.name) try: response = requests.get(the_url).content trait_info = json.loads(response) except: # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait if dataset.type == 'Publish': query = """ SELECT PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID, CAST(Phenotype.Pre_publication_description AS BINARY), CAST(Phenotype.Post_publication_description AS BINARY), CAST(Phenotype.Original_description AS BINARY), CAST(Phenotype.Pre_publication_abbreviation AS BINARY), CAST(Phenotype.Post_publication_abbreviation AS BINARY), PublishXRef.mean, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), CAST(Publication.Title AS BINARY), CAST(Publication.Abstract AS BINARY), CAST(Publication.Journal AS BINARY), Publication.Volume, Publication.Pages, Publication.Month, Publication.Year, PublishXRef.Sequence, Phenotype.Units, PublishXRef.comments FROM PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet WHERE PublishXRef.Id = %s AND Phenotype.Id = PublishXRef.PhenotypeId AND Publication.Id = PublishXRef.PublicationId AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishXRef.InbredSetId = InbredSet.Id AND PublishFreeze.Id = %s """ % (trait.name, dataset.id) logger.sql(query) trait_info = g.db.execute(query).fetchone() # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif dataset.type == 'ProbeSet': display_fields_string = ', ProbeSet.'.join(dataset.display_fields) display_fields_string = 'ProbeSet.' + display_fields_string query = """ SELECT %s FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' """ % (escape(display_fields_string), escape( dataset.name), escape(str(trait.name))) logger.sql(query) trait_info = g.db.execute(query).fetchone() # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif dataset.type == 'Geno': display_fields_string = ',Geno.'.join(dataset.display_fields) display_fields_string = 'Geno.' + display_fields_string query = """ SELECT %s FROM Geno, GenoFreeze, GenoXRef WHERE GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND GenoFreeze.Name = '%s' AND Geno.Name = '%s' """ % (escape(display_fields_string), escape( dataset.name), escape(trait.name)) logger.sql(query) trait_info = g.db.execute(query).fetchone() else: # Temp type query = """SELECT %s FROM %s WHERE Name = %s""" logger.sql(query) trait_info = g.db.execute(query, ','.join(dataset.display_fields), dataset.type, trait.name).fetchone() if trait_info: trait.haveinfo = True for i, field in enumerate(dataset.display_fields): holder = trait_info[i] if isinstance(holder, bytes): holder = holder.decode("utf-8", errors="ignore") setattr(trait, field, holder) if dataset.type == 'Publish': if trait.group_code: trait.display_name = trait.group_code + "_" + str(trait.name) trait.confidential = 0 if trait.pre_publication_description and not trait.pubmed_id: trait.confidential = 1 description = trait.post_publication_description # If the dataset is confidential and the user has access to confidential # phenotype traits, then display the pre-publication description instead # of the post-publication description if not trait.pubmed_id: trait.abbreviation = trait.pre_publication_abbreviation trait.description_display = trait.pre_publication_description else: trait.abbreviation = trait.post_publication_abbreviation if description: trait.description_display = description.strip() else: trait.description_display = "" if not trait.year.isdigit(): trait.pubmed_text = "N/A" else: trait.pubmed_text = trait.year if trait.pubmed_id: trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id if dataset.type == 'ProbeSet' and dataset.group: description_string = trait.description target_string = trait.probe_target_description if str(description_string or "") != "" and description_string != 'None': description_display = description_string else: description_display = trait.symbol if (str(description_display or "") != "" and description_display != 'N/A' and str(target_string or "") != "" and target_string != 'None'): description_display = description_display + '; ' + target_string.strip( ) # Save it for the jinja2 template trait.description_display = description_display trait.location_repr = 'N/A' if trait.chr and trait.mb: trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) elif dataset.type == "Geno": trait.location_repr = 'N/A' if trait.chr and trait.mb: trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) if get_qtl_info: # LRS and its location trait.LRS_score_repr = "N/A" trait.LRS_location_repr = "N/A" trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" if dataset.type == 'ProbeSet' and not trait.cellid: trait.mean = "" query = """ SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive FROM ProbeSetXRef, ProbeSet WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSet.Name = "{}" AND ProbeSetXRef.ProbeSetFreezeId ={} """.format(trait.name, dataset.id) logger.sql(query) trait_qtl = g.db.execute(query).fetchone() if trait_qtl: trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl if trait.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(dataset.group.species, trait.locus) logger.sql(query) result = g.db.execute(query).fetchone() if result: trait.locus_chr = result[0] trait.locus_mb = result[1] else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" if dataset.type == 'Publish': query = """ SELECT PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive FROM PublishXRef, PublishFreeze WHERE PublishXRef.Id = %s AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id =%s """ % (trait.name, dataset.id) logger.sql(query) trait_qtl = g.db.execute(query).fetchone() if trait_qtl: trait.locus, trait.lrs, trait.additive = trait_qtl if trait.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(dataset.group.species, trait.locus) logger.sql(query) result = g.db.execute(query).fetchone() if result: trait.locus_chr = result[0] trait.locus_mb = result[1] else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.lrs = trait.additive = "" if (dataset.type == 'Publish' or dataset.type == "ProbeSet" ) and str(trait.locus_chr or "") != "" and str(trait.locus_mb or "") != "": trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( trait.locus_chr, float(trait.locus_mb)) if str(trait.lrs or "") != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs else: raise KeyError( repr(trait.name) + ' information is not found in the database.') return trait
def get_trait_info(self, trait_list=None, species=''): # Note: setting trait_list to [] is probably not a great idea. if not trait_list: trait_list = [] for this_trait in trait_list: if not this_trait.haveinfo: this_trait.retrieveInfo(QTL=1) if not this_trait.symbol: this_trait.symbol = "N/A" # XZ, 12/08/2008: description # XZ, 06/05/2009: Rob asked to add probe target description description_string = str( str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') target_string = str( str(this_trait.probe_target_description).strip( codecs.BOM_UTF8), 'utf-8') if len(description_string) > 1 and description_string != 'None': description_display = description_string else: description_display = this_trait.symbol if (len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None'): description_display = description_display + '; ' + target_string.strip( ) # Save it for the jinja2 template this_trait.description_display = description_display if this_trait.chr and this_trait.mb: this_trait.location_repr = 'Chr%s: %.6f' % ( this_trait.chr, float(this_trait.mb)) # Get mean expression value query = ( """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet where ProbeSetXRef.ProbeSetFreezeId = %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSet.Name = '%s' """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) #logger.debug("query is:", pf(query)) logger.sql(query) result = g.db.execute(query).fetchone() mean = result[0] if result else 0 if mean: this_trait.mean = "%2.3f" % mean # LRS and its location this_trait.LRS_score_repr = 'N/A' this_trait.LRS_location_repr = 'N/A' # Max LRS and its Locus location if this_trait.lrs and this_trait.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(species, this_trait.locus) logger.sql(query) result = g.db.execute(query).fetchone() if result: lrs_chr, lrs_mb = result this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_location_repr = 'Chr%s: %.6f' % ( lrs_chr, float(lrs_mb)) return trait_list