class MutationQuery(object): def __init__(self, url_information): self.url_information = url_information # print(url_information) self.db = DBBase('Genes') def resolve_url(self): url_information_list = self.url_information.split(',') url_information_dict = {} for item in url_information_list: try: item_key, item_value = item.split('=') url_information_dict[item_key] = item_value except: num = 0 for index, item_item in enumerate(item): if item_item in "0123456789": num = index break item_key = item[0:num] item_value = item[num:-1] url_information_dict[item_key] = item_value return url_information_dict def is_type(self, a): lis = "1234567890" for item in a: if item not in lis: return "symbol" else: return "ID" def get_mutation(self): mutation_dict = self.resolve_url() if self.is_type(mutation_dict.get('gene')) == 'ID': id_or_symbol = "ENTREZ_ID" else: id_or_symbol = "Symbol" if mutation_dict.get('location') == 'cnv': dnm_orcnv = 'CNV' else: dnm_orcnv = 'DNM' all_mutation = self.db.find_one_by_one_condition( id_or_symbol, mutation_dict.get('gene')).get(dnm_orcnv) for item in all_mutation: if dnm_orcnv == 'DNM': if item.get('chr') == mutation_dict.get('chr') and item.get( 'Position') == mutation_dict.get('position'): return item else: if item.get('start') == mutation_dict.get( 'start') and item.get('end') == mutation_dict.get( 'end'): return item
class GetIsoformAndEnst(object): def __init__(self, entrez_id): self.entrez_id = entrez_id self.db = DBBase("Uniprot_Information") def get_isoform_and_enst(self): query_id_result = self.db.find_one_by_one_condition( "Entrez_ID", self.entrez_id) isoform_and_enst_list = [] if query_id_result != None: uniprot_ensmbel_information = query_id_result.get( "Ensmbl_information") if uniprot_ensmbel_information != None and uniprot_ensmbel_information != []: for item in uniprot_ensmbel_information: if item.get("uniprot_unique_flag") == True: note = "No assemble annotation found in GRCH37." else: note = "-" if note == '-': if item.get("molecule") != None: isoform = item.get("molecule").get("@id") enst_id = item.get("@id") if item.get('property') != None: for item_item in item.get('property'): if item_item.get( "@type") == 'protein sequence ID': ensp_id = item_item.get("@value") if item_item.get("@type") == 'gene ID': ensg_id = item_item.get("@value") else: if item.get("@id") != None: enst_id = item.get("@id") isoform = query_id_result.get("UniprotKB_AC") if item.get("property") != None: for item_item in item.get("property"): if item_item.get( "@type" ) == "protein sequence ID": ensp_id = item_item.get("@value") elif item_item.get( "@type") == "gene ID": ensg_id = item_item.get("@value") item_list = [enst_id, ensp_id, ensg_id, isoform, note] isoform_and_enst_list.append(item_list) else: pass # print(isoform_and_enst_list) return isoform_and_enst_list def get_isoform(self, enst_id): all_isoform = self.get_isoform_and_enst() for item in all_isoform: if item[0] == enst_id.split(".")[0]: return item[3]
def __init__(self, ID, isoform="all"): self.ID = ID self.db = DBBase('ProteinExpress_Update') # print(isoform) self.isoform_list = isoform
def __init__(self, entrez_id): self.entrez_id = entrez_id self.db = DBBase("Uniprot_Information")
def __init__(self, ID, transcript, brain_express=0): self.ID = ID self.db = DBBase('Gtex_Trans_AllTissue_Pledged') self.transcript_list = transcript self.brain_express = brain_express
class GtexTranscriptExpressPlot(object): def __init__(self, ID, transcript, brain_express=0): self.ID = ID self.db = DBBase('Gtex_Trans_AllTissue_Pledged') self.transcript_list = transcript self.brain_express = brain_express def get_express_value_from_db(self): if self.transcript_list == "all": return self.db.find_count_by_one_condition("Entrez_id", self.ID) # .get('ProteinExpressData') def get_columns(self): columns = [ 'Adipose - Subcutaneous', 'Adipose - Visceral (Omentum)', 'Adrenal Gland', 'Artery - Aorta', 'Artery - Coronary', 'Artery - Tibial', 'Bladder', 'Brain - Amygdala', 'Brain - Anterior cingulate cortex (BA24)', 'Brain - Caudate (basal ganglia)', 'Brain - Cerebellar Hemisphere', 'Brain - Cerebellum', 'Brain - Cortex', 'Brain - Frontal Cortex (BA9)', 'Brain - Hippocampus', 'Brain - Hypothalamus', 'Brain - Nucleus accumbens (basal ganglia)', 'Brain - Putamen (basal ganglia)', 'Brain - Spinal cord (cervical c-1)', 'Brain - Substantia nigra', 'Breast - Mammary Tissue', 'Cells - EBV-transformed lymphocytes', 'Cells - Transformed fibroblasts', 'Cervix - Ectocervix', 'Cervix - Endocervix', 'Colon - Sigmoid', 'Colon - Transverse', 'Esophagus - Gastroesophageal Junction', 'Esophagus - Mucosa', 'Esophagus - Muscularis', 'Fallopian Tube', 'Heart - Atrial Appendage', 'Heart - Left Ventricle', 'Kidney - Cortex', 'Liver', 'Lung', 'Minor Salivary Gland', 'Muscle - Skeletal', 'Nerve - Tibial', 'Ovary', 'Pancreas', 'Pituitary', 'Prostate', 'Skin - Not Sun Exposed (Suprapubic)', 'Skin - Sun Exposed (Lower leg)', 'Small Intestine - Terminal Ileum', 'Spleen', 'Stomach', 'Testis', 'Thyroid', 'Uterus', 'V****a', 'Whole Blood' ] return columns def get_brain_columns(self): columns = [ 'Brain - Amygdala', 'Brain - Anterior cingulate cortex (BA24)', 'Brain - Caudate (basal ganglia)', 'Brain - Cerebellar Hemisphere', 'Brain - Cerebellum', 'Brain - Cortex', 'Brain - Frontal Cortex (BA9)', 'Brain - Hippocampus', 'Brain - Hypothalamus', 'Brain - Nucleus accumbens (basal ganglia)', 'Brain - Putamen (basal ganglia)', 'Brain - Spinal cord (cervical c-1)', 'Brain - Substantia nigra' ] return columns def l2(self, x): """ 对输入的x取log :param x: :return: """ x = float(x) if x != 0: return math.log2(x + 1) else: return 0 def r2(self, x): """ 对输入的x取小数点后两位小数的方法 :param x: :return: """ return round(x, 2) def average(self, num_list): # print(num_list) num_list = [float(item) for item in num_list] return (sum(num_list)) / len(num_list) # return self.r2(self.l2(float(sum(num_list)) / len(num_list))) def data_handling(self): express_list = self.get_express_value_from_db() if len(express_list) >= 1: #取出了多于等于一条转录本 express_average = [] if self.brain_express == 0: for item_trans in express_list: list_avg = [item_trans.get("transcript_id")] for item_colunms in self.get_columns(): if item_trans.get(item_colunms) != None: # list_avg.append(item_trans.get("transcript_id")) # print(item_trans.get(item_colunms)) list_avg.append( self.r2(self.l2(item_trans.get(item_colunms)))) else: list_avg.append(None) express_average.append(list_avg) else: for item_trans in express_list: list_avg = [item_trans.get("transcript_id")] for item_colunms in self.get_columns(): if item_trans.get(item_colunms) != None: list_avg.append( self.r2(self.l2(item_trans.get(item_colunms)))) else: list_avg.append(None) temp_flag = 0 for item_list_avg in list_avg[8:21]: #脑组织表达的列为8-21列,第一列为id if item_list_avg >= 1: temp_flag = 1 break if temp_flag == 1: express_average.append(list_avg) else: express_average = [] return express_average def plot(self): express_average = self.data_handling() if express_average != []: trace = [] x_columns = self.get_columns() for item in express_average: # print(item) # print(item[1:-1]) trace.append( go.Scatter( x=x_columns, y=item[1:-1], hoverinfo='all', # mode="line", name=item[0], ycalendar=180, # line=dict(shape='spline'), hoverlabel=dict(namelength=-1))) layouts = go.Layout( # paper_bgcolor='rgb(249, 249, 249)', # plot_bgcolor='rgb(249, 249, 249)', height=400, width=1200, hovermode='closest', margin=go.Margin( # x,y轴label距离图纸四周的距离 l=50, r=100, b=200, t=10, pad=0), xaxis=dict( showgrid=True, zeroline=False, showline=False, showticklabels=True, tickangle=90, # x轴刻度之间距离 ), yaxis=dict(autorange=True, title='log<sub>2</sub> (TPM+1)', titlefont=dict(family='Arial, sans-serif', size=18, color='lightgrey'), showticklabels=True, tickangle=90, tickfont=dict(family='Old Standard TT, serif', size=14, color='black'), exponentformat='e', showexponent='All')) figs = go.Figure(data=trace, layout=layouts) # plotly.offline.plot(figs, show_link=False) return plotly.offline.plot(figs, show_link=False, output_type="div", include_plotlyjs=False) else: return '<div>There is no corresponding data published yet, we will update it when such data available. </div>'
def __init__(self, ID): self.ID = ID self.db = DBBase('ProteinExpression')
class ProteinExpressPlot(object): def __init__(self, ID): self.ID = ID self.db = DBBase('ProteinExpression') def get_express_value_from_db(self): return self.db.find_one_by_one_condition("ENTREZ_ID", self.ID).get('ProteinExpressData') def plot(self): express_list = self.get_express_value_from_db() if express_list != None: x = [] y = [] array = [] for item in express_list: x.append(item['TISSUE_NAME']) y.append(item['NORMALIZED_INTENSITY']) min = float(item['MIN_NORMALIZED_INTENSITY']) max = float(item['MAX_NORMALIZED_INTENSITY']) error = (max - min) / 2 array.append(error) trace1 = go.Bar( x=x, y=y, # name='NSD2', error_y=dict( type='data', array=array, visible=True ) ) data = [trace1] layout = go.Layout( paper_bgcolor='rgb(249, 249, 249)', plot_bgcolor='rgb(249, 249, 249)', barmode='group', height=500, width=600, title='<br>Median protein expression</br>', # yaxis=dict(range=[0, 10]), # titlefont=dict(size=25), plot_bgcolor='#EFECEA', hovermode='closest', margin=go.Margin( # x,y轴label距离图纸四周的距离 l=70, r=30, b=100, t=10, pad=0 ), xaxis=dict( # autorange=True, # title='Diffient Brain region', # titlefont=dict( # family='Arial, sans-serif', # size=18, # color='lightgrey' # ), showgrid=True, zeroline=False, showline=False, showticklabels=True, tickangle=20, # x轴刻度之间距离 # automargin=False, # separatethousands=True, ), yaxis=dict( # range=[0,], autorange=True, title='log <sub>10</sub> normalized iBAQ intensity', titlefont=dict( family='Arial, sans-serif', size=18, color='lightgrey' ), showticklabels=True, tickangle=90, tickfont=dict( family='Old Standard TT, serif', size=14, color='black' ), exponentformat='e', showexponent='All' ) ) fig = go.Figure(data=data, layout=layout) # plotly.offline.plot(fig, show_link=False) return plotly.offline.plot(fig, show_link=False, output_type="div",include_plotlyjs=False) else: return '<div>There is no corresponding data published yet, we will update it when such data available. </div>'
def __init__(self, ID, transcript): self.ID = ID self.db = DBBase('Gtex_Trans_AllTissue_Pledged') for index, item in enumerate(transcript): transcript[index] = item.split(".")[0] self.transcript_list = transcript
class GetBrainExpressTranscript(object): def __init__(self, ID, transcript): self.ID = ID self.db = DBBase('Gtex_Trans_AllTissue_Pledged') for index, item in enumerate(transcript): transcript[index] = item.split(".")[0] self.transcript_list = transcript # print(self.transcript_list) def get_express_value_from_db(self): return self.db.find_count_by_one_condition("Entrez_id", self.ID) def get_columns(self): columns = ['Adipose - Subcutaneous', 'Adipose - Visceral (Omentum)', 'Adrenal Gland', 'Artery - Aorta', 'Artery - Coronary', 'Artery - Tibial', 'Bladder', 'Brain - Amygdala', 'Brain - Anterior cingulate cortex (BA24)', 'Brain - Caudate (basal ganglia)', 'Brain - Cerebellar Hemisphere', 'Brain - Cerebellum', 'Brain - Cortex', 'Brain - Frontal Cortex (BA9)', 'Brain - Hippocampus', 'Brain - Hypothalamus', 'Brain - Nucleus accumbens (basal ganglia)', 'Brain - Putamen (basal ganglia)', 'Brain - Spinal cord (cervical c-1)', 'Brain - Substantia nigra', 'Breast - Mammary Tissue', 'Cells - EBV-transformed lymphocytes', 'Cells - Transformed fibroblasts', 'Cervix - Ectocervix', 'Cervix - Endocervix', 'Colon - Sigmoid', 'Colon - Transverse', 'Esophagus - Gastroesophageal Junction', 'Esophagus - Mucosa', 'Esophagus - Muscularis', 'Fallopian Tube', 'Heart - Atrial Appendage', 'Heart - Left Ventricle', 'Kidney - Cortex', 'Liver', 'Lung', 'Minor Salivary Gland', 'Muscle - Skeletal', 'Nerve - Tibial', 'Ovary', 'Pancreas', 'Pituitary', 'Prostate', 'Skin - Not Sun Exposed (Suprapubic)', 'Skin - Sun Exposed (Lower leg)', 'Small Intestine - Terminal Ileum', 'Spleen', 'Stomach', 'Testis', 'Thyroid', 'Uterus', 'V****a', 'Whole Blood'] return columns def get_brain_columns(self): columns = ['Brain - Amygdala', 'Brain - Anterior cingulate cortex (BA24)', 'Brain - Caudate (basal ganglia)', 'Brain - Cerebellar Hemisphere', 'Brain - Cerebellum', 'Brain - Cortex', 'Brain - Frontal Cortex (BA9)', 'Brain - Hippocampus', 'Brain - Hypothalamus', 'Brain - Nucleus accumbens (basal ganglia)', 'Brain - Putamen (basal ganglia)', 'Brain - Spinal cord (cervical c-1)', 'Brain - Substantia nigra'] return columns def l2(self, x): """ 对输入的x取log :param x: :return: """ if x != 0: return math.log2(x + 1) else: return 0 def r2(self, x): """ 对输入的x取小数点后两位小数的方法 :param x: :return: """ return round(x, 2) def average(self, num_list): num_list = [float(item) for item in num_list] return self.r2(self.l2(float(sum(num_list)) / len(num_list))) def data_handling(self): express_list = self.get_express_value_from_db() express_average = [] final_transcript = [] if len(express_list) >= 1: # 取出了多于等于一条转录本 for item_trans in express_list: list_avg = [item_trans.get("transcript_id")] for item_colunms in self.get_columns(): if item_trans.get(item_colunms) != None: list_avg.append(self.r2(self.l2(item_trans.get(item_colunms)))) else: list_avg.append(None) temp_flag = 0 for item_list_avg in list_avg[8:21]: # 脑组织表达的列为8-21列 if item_list_avg >= 1: temp_flag = 1 if temp_flag == 1 and list_avg[0].split(".")[0] in self.transcript_list: # if list_avg[0].split(".")[0] in self.transcript_list: express_average.append(list_avg) final_transcript.append(list_avg[0]) else: express_average = [] return final_transcript, express_average
def __init__(self, url_information): self.url_information = url_information # print(url_information) self.db = DBBase('Genes')
def __init__(self, ID, mutation): self.id = ID self.db = DBBase('Genes') self.mutation_query = MutationQuery(mutation)
class MutationTrancript(object): def __init__(self, ID, mutation): self.id = ID self.db = DBBase('Genes') self.mutation_query = MutationQuery(mutation) def get_all_tarncript(self): gene_data = self.db.find_one_by_one_condition("ENTREZ_ID", self.id) gene_genecode_trans = gene_data.get("Trans_Gencode") # gene_refseq_data = gene_data.get("Trans_Ref") all_tarncript = [] if gene_genecode_trans != None: for item in gene_genecode_trans: all_tarncript.append(item) # if gene_refseq_data != None: # for item in gene_refseq_data: # all_tarncript.append(item) return all_tarncript def judge_mutation_position_on_trancript_exon(self, position, transcriot_exon, type=None): flag = False if type == 'splice-site' or type.startswith("splice-site"): transcriot_exon_temp_list = transcriot_exon.split(",") for item in transcriot_exon_temp_list: if int(position) >= int(item.split("_")[0].replace( '"', '')) - 10 and int(position) <= int( item.split("_")[1].replace('"', '')) + 10: flag = True return flag else: transcriot_exon_temp_list = transcriot_exon.split(",") for item in transcriot_exon_temp_list: if int(position) >= int(item.split("_")[0].replace( '"', '')) and int(position) <= int( item.split("_")[1].replace('"', '')): flag = True return flag def get_mutation_impact_transcript(self): all_transcript = self.get_all_tarncript() mutation = self.mutation_query.get_mutation() if mutation.get("Exonic Func") != None: mutation_type = mutation.get("Exonic Func") else: mutation_type = None mutation_position = mutation.get("Position") mutation_chr = mutation.get("chr") if mutation_position != None: for item in all_transcript: if item.get('coding_exon_region') != 'NA': if mutation_chr == item.get("chrom").replace( "chr", "") or mutation_chr == item.get("chrom"): if self.judge_mutation_position_on_trancript_exon( mutation_position, item.get('coding_exon_region'), mutation_type): item['flag'] = True else: item['flag'] = False else: item['flag'] = False else: item['flag'] = False result = [] for item in all_transcript: if item.get("flag") == True: result.append(item.get("TranscriptID")) return result def get_mutation_impact_isoforms(self): all_transcript = self.get_all_tarncript() mutation = self.mutation_query.get_mutation() # print(mutation.get("Exonic Func")) if mutation.get("Exonic Func") != None: mutation_type = mutation.get("Exonic Func") else: mutation_type = None mutation_position = mutation.get("Position") mutation_chr = mutation.get("chr") if mutation_position != None: for item in all_transcript: if item.get('coding_exon_region') != 'NA': if mutation_chr == item.get("chrom").replace( "chr", "") or mutation_chr == item.get("chrom"): if self.judge_mutation_position_on_trancript_exon( mutation_position, item.get('coding_exon_region'), mutation_type): item['flag'] = True else: item['flag'] = False else: item['flag'] = False else: item['flag'] = False transcript_result = [] for item in all_transcript: if item.get("flag") == True: transcript_result.append(item.get("TranscriptID")) isoform_result = [] db = GetIsoformAndEnst(self.id) for item in transcript_result: if db.get_isoform(item) != None: isoform_result.append(db.get_isoform(item)) return isoform_result
class ProteinExpressPlot(object): def __init__(self, ID, isoform_list="all"): self.ID = ID self.db = DBBase('ProteinExpress_Update') self.isoform_list = isoform_list def get_express_value_from_db(self): x_list = [] y_list = [] array_list = [] id_list = [] if self.isoform_list == "all": results = self.db.find_count_by_one_condition("Entrez_ID", self.ID) flag = [0] * len(results) for index, item in enumerate(results): # print() if item.get('Express_Value') != { 'd': { 'results': [] } } and '-' in item.get('Uniprot_ID'): for item_item in item.get('Express_Value').get('d').get( 'results'): if item_item.get('TISSUE_NAME') in [ "brain", 'arachnoid cyst', 'cerebral cortex', 'cerebrospinal fluid', 'prefrontal cortex', 'spinal cord' ] and float( item_item.get('NORMALIZED_INTENSITY')) >= 1: flag[index] = 1 for item, item_flag in zip(results, flag): if item_flag != 0: if item.get('Express_Value') != {'d': {'results': []}}: x = [] y = [] array = [] for item_item in item.get('Express_Value').get( 'd').get('results'): # print(item_item['TISSUE_NAME']) x.append(item_item['TISSUE_NAME']) y.append(item_item['NORMALIZED_INTENSITY']) min = float(item_item['MIN_NORMALIZED_INTENSITY']) max = float(item_item['MAX_NORMALIZED_INTENSITY']) error = (max - min) / 2 array.append(error) id_list.append(item.get("Uniprot_ID")) x_list.append(x) y_list.append(y) array_list.append(array) # print(len(result_list)) # print(result_list) return x_list, y_list, array_list, id_list else: results = self.db.find_count_by_one_condition("Entrez_ID", self.ID) flag = [0] * len(results) for index, item in enumerate(results): # print(item) if item.get('Express_Value') != {'d': {'results': []}}: for item_item in item.get('Express_Value').get('d').get( 'results'): if item_item.get('TISSUE_NAME') in [ "brain", 'arachnoid cyst', 'cerebral cortex', 'cerebrospinal fluid', 'prefrontal cortex', 'spinal cord' ] and float( item_item.get('NORMALIZED_INTENSITY')) >= 0.5: flag[index] = 1 for item, item_flag in zip(results, flag): if item_flag != 0 and item.get( 'Uniprot_ID') in self.isoform_list: if item.get('Express_Value') != {'d': {'results': []}}: x = [] y = [] array = [] for item_item in item.get('Express_Value').get( 'd').get('results'): # print(item_item['TISSUE_NAME']) x.append(item_item['TISSUE_NAME']) y.append(item_item['NORMALIZED_INTENSITY']) min = float(item_item['MIN_NORMALIZED_INTENSITY']) max = float(item_item['MAX_NORMALIZED_INTENSITY']) error = (max - min) / 2 array.append(error) if item.get("Uniprot_ID") != None and item.get( "Uniprot_ID") in self.isoform_list: id_list.append(item.get("Uniprot_ID")) x_list.append(x) y_list.append(y) array_list.append(array) # print(len(result_list)) # print(result_list) return x_list, y_list, array_list, id_list def get_weight(self, w): if w <= 10: return 200 elif w > 10 and w <= 30: return w * 20 + 200 else: return 800 def plot(self): x_list, y_list, array_list, id_list = self.get_express_value_from_db() if len(x_list) == len(y_list) and len(x_list) != 0: trace = [] for x_item, y_item, array_item, id_item in zip( x_list, y_list, array_list, id_list): for i, item in enumerate(x_item): if item in [ "brain", 'arachnoid cyst', 'cerebral cortex', 'cerebrospinal fluid', 'prefrontal cortex', 'spinal cord' ]: x_item[i] = "<b>" + x_item[i] + " </b>" else: x_item[i] = x_item[i] + ' ' trace.append( go.Bar(x=y_item, y=x_item, name=id_item, orientation='h', hoverinfo='all', error_x=dict(type='data', array=array_item, visible=True))) if len(x_list) >= 1: l_height = self.get_weight(len(x_list[0]) * len(id_list)) else: l_height = 400 layout = go.Layout( paper_bgcolor='rgb(249, 249, 249)', plot_bgcolor='rgb(249, 249, 249)', barmode='stack', height=l_height, width=800, title='<br>Median protein expression</br>', # yaxis=dict(range=[0, 10]), # titlefont=dict(size=25), plot_bgcolor='#EFECEA', hovermode='closest', margin=go.Margin( # x,y轴label距离图纸四周的距离 l=250, r=120, b=80, t=40, pad=0), xaxis=dict( # autorange=True, # title='Diffient Brain region', # titlefont=dict( # family='Arial, sans-serif', # size=18, # color='lightgrey' # ), title='log <sub>10</sub> normalized iBAQ intensity', titlefont=dict( family='Arial, sans-serif', size=18, ), showgrid=True, zeroline=True, showline=True, showticklabels=True, # tickangle=40, # x轴刻度之间距离 # automargin=False, # separatethousands=True, ), yaxis=dict( # range=[0,], autorange=True, # gridwidth=0.5, showticklabels=True, # tickangle=90, # tickfont=dict( # # family='Old Standard TT, serif', # # size=14, # # color='black' # ), exponentformat='e', showexponent='All')) fig = go.Figure(data=trace, layout=layout) # plotly.offline.plot(fig, show_link=False) return plotly.offline.plot(fig, show_link=False, output_type="div", include_plotlyjs=False) else: return '<div>There is no corresponding data published yet, we will update it when such data available. </div>'