def significance(self): gene = request.args.get('gene') species = request.args.get('species') # tt = request.args.get('ThresholdThreshold') try: genes = gene.split(',') except: raise InvalidUsage('not found gene', 400) result = {'data1': [], 'data2': [], 'data3': []} if not gene or not species: return make_dict_response(data=result) if species == 'disease': gds = db.session.query(GeneDiseaseSignificance).filter( GeneDiseaseSignificance.gene.in_(genes)).all() result['data2'] = [g.to_json() for g in gds] if species == 'tissue': tgs = db.session.query(TissueGeneSignificance).filter( TissueGeneSignificance.gene_symbol.in_(genes)).all() result['data2'] = [g.to_json() for g in tgs] return make_dict_response(data=result)
def list(self): disease_name = request.args.get('name') if not disease_name: return make_dict_response({'gene': []}) gwas_disease = db.session.query(GeneticGWAS.disease).distinct().filter( GeneticGWAS.disease.ilike("%" + disease_name + "%")) omim_disease = db.session.query(GeneticOMIM.disease).distinct().filter( GeneticOMIM.disease.ilike("%" + disease_name + "%")) disease_query = gwas_disease.union_all(omim_disease).distinct().limit( 20) return make_dict_response(data=[{ 'id': x.disease, 'name': x.disease } for x in disease_query])
def single_cell_list(self): cell_file_name = request.args.get('name') cell_file_name = cell_colnum_mapping()[cell_file_name] if cell_file_name: cluster_list = db.session.query( OutCell.cluster, OutCell.labels).filter_by( cell_type=cell_file_name).distinct().all() cluster_list = sorted(cluster_list, key=lambda x: int(x[0])) rs_cluster = [ f'cluster:{cluster}:{labels}' for cluster, labels in cluster_list ] return make_dict_response(data=rs_cluster) else: return make_dict_response(data=[])
def get(self): variant_id = request.args.get('name') if variant_id is None: raise InvalidUsage('incomplete', 400) summary = db.session.query(Summary).filter_by(snpid=variant_id).first() causality = db.session.query(Causality).filter_by(variant=variant_id).all() gnomad = db.session.query(Gnomad).filter_by(variant=variant_id).first() gnomad_data_value = [] gnomad_data_text = [] gnomad_data_key = [] if gnomad: for k, v in gnomad.territory.items(): gnomad_data_value.append(float(v) * 100) gnomad_data_text.append(f'{v}%') gnomad_data_key.append(area[k]) response = { 'summary': None if not summary else summary.to_json(), 'causality': [ex.to_json() for ex in causality], 'gnomad': { 'y': gnomad_data_key, 'x': gnomad_data_value, 'text': gnomad_data_text, }, } return make_dict_response(response)
def gene_and_expression(self): gene_id = request.args.get('gene_id') if not gene_id: raise InvalidUsage('gene_id not provided') gene = db.session.query(Gene).filter_by(gene=gene_id).first() return make_dict_response(gene=None if not gene else gene.to_json())
def get(self): gene_name = request.args.get('name') if not gene_name: raise InvalidUsage('name not provided', 400) gene = db.session.query(Gene).filter_by( symbol=gene_name).first_or_404() return make_dict_response(data=gene.to_json())
def tracks(self): colors = itertools.cycle(cl.scales['4']['div']['RdYlBu']) results = db.session.query(Epigenomics).order_by( Epigenomics.disease.asc(), Epigenomics.data_type.asc()).all() ref_track = AppConfig.get_config_value('igv.ref.hg38') epigenomics = [x.to_json(color=next(colors)) for x in results] return make_dict_response(tracks=epigenomics, reference=ref_track)
def datasets(self): result = [ { 'key': 'microarray_tissue', 'name': 'Human (Microarray) - Tissue', 'options': list(ExpressionMicroarrayTissue.column_mappings().values()), 'example': 'AADAC,AAK1,AANAT,AASDHPPT,AASS,AATF,ABCA1,ABCA8,ABCB1,ABCB11', }, { 'key': 'microarray_disease', 'name': 'Human (Microarray) - Disease', 'options': list(ExpressionMicroarrayDisease.column_mappings().values()), 'example': 'AADAC,AAK1,AANAT,AASDHPPT,AASS,AATF,ABCA1,ABCA8,ABCB1,ABCB11', }, { 'key': 'rnaseq_tissue', 'name': 'Human (RNASeq) - Tissue', 'options': list(ExpressionRNASeqTissue.column_mappings().values()), 'example': 'OR4F5,SAMD11,NOC2L,KLHL17,PLEKHN1,C1orf170,HES4,ISG15,AGRN,RNF223', }, { 'key': 'rnaseq_disease', 'name': 'Human (RNASeq) - Disease', 'options': list(ExpressionRNASeqDisease.column_mappings().values()), 'example': 'OR4F5,SAMD11,NOC2L,KLHL17,PLEKHN1,C1orf170,HES4,ISG15,AGRN,RNF223', }, { 'key': 'mouse', 'name': 'Mouse', 'options': list(ExpressionMouse.column_mappings().values()), 'example': 'Zfp92,Zfp91-cntf,Zfp91,Zfp90,Zfp9,Zfp87,Zfp85-rs1,Zfp84', }, ] # return make_dict_response(data=[]) return make_dict_response(data=result)
def gene_disease_network(self): keywords = split_params(request.args.get('keywords', '')) if not keywords: raise InvalidUsage('No keywords provided') sql = text(""" SELECT gene, disease, weight FROM gene_network_disease WHERE gene IN :keywords or disease in :keywords; """) df = pd.read_sql(sql, db.engine, params={'keywords': tuple(keywords)}) if len(df) == 0: raise InvalidUsage('No data points found') df = df.groupby(['gene', 'disease']).agg({ 'weight': 'max' }).reset_index().copy() weight_max = df['weight'].max() if weight_max > 0: df['weight'] = df['weight'] / weight_max df.loc[df['weight'] == 0, 'weight'] = df['weight'][df['weight'] > 0].min() * 0.1 else: df['weight'] = 1 unique_genes = df['gene'].drop_duplicates() unique_diseases = df['disease'].drop_duplicates() gene_mappings = {} disease_mappings = {} nodes = [] node_counter = 0 for g in unique_genes: nodes.append({'node': node_counter, 'name': g}) gene_mappings[g] = node_counter node_counter = node_counter + 1 for d in unique_diseases: nodes.append({'node': node_counter, 'name': d}) disease_mappings[d] = node_counter node_counter = node_counter + 1 links = [] for _, row in df.iterrows(): links.append({ 'source': gene_mappings[row['gene']], 'target': disease_mappings[row['disease']], 'value': row['weight'] }) node_links = {'nodes': nodes, 'links': links} return make_dict_response(data=node_links)
def gene_expression_violin(self): gene_name = request.args.get('name') if not gene_name: raise InvalidUsage('name not provided') genes = db.session.query(GeneExpression).filter_by( gene=gene_name).all() response = { 'gene': [gene.to_json() for gene in genes], 'disease': sorted(set(x.disease for x in genes)), } return make_dict_response(response)
def analysis_go(self): disease = request.args.get('disease') gos_query = db.session.query(DiseaseGo).filter_by(disease=disease) bps = gos_query.filter_by(dataset='BP').all() ccs = gos_query.filter_by(dataset='CC').all() mfs = gos_query.filter_by(dataset='MF').all() keggs = gos_query.filter_by(dataset='KEGG').all() result = { 'bp': [bp.to_json() for bp in bps], 'cc': [cc.to_json() for cc in ccs], 'mf': [mf.to_json() for mf in mfs], 'kegg': [kegg.to_json() for kegg in keggs], } return make_dict_response(go=result)
def total(self): gene = db.session.query(Gene.symbol).distinct() variant = db.session.query(Summary.snpid).distinct() gwas_disease = db.session.query(GeneticGWAS.disease).distinct() omim_disease = db.session.query(GeneticOMIM.disease).distinct() disease_count = gwas_disease.union_all(omim_disease).distinct() response = { 'gene': [i[0] for i in gene], 'variant': [i[0] for i in variant], 'disease': [i[0] for i in disease_count] } return make_dict_response(data=response)
def get(self): cluster = request.args.get('cluster') cell_file_name = request.args.get('name') cell_file_name = cell_colnum_mapping()[cell_file_name] cell_tsne1 = [] cell_tsne2 = [] cell_group = [] if cluster: cluster_columns = [cl.split(':')[1] for cl in cluster.split(',')] label_columns = [cl.split(':')[2] for cl in cluster.split(',')] if label_columns.count('None') > 0: rs_data = db.session.query(OutCell).filter_by( cell_type=cell_file_name).filter( OutCell.cluster.in_(cluster_columns)).all() else: rs_data = db.session.query(OutCell).filter_by( cell_type=cell_file_name).filter( and_(OutCell.cluster.in_(cluster_columns), OutCell.labels.in_(label_columns))).all() rs_x = list( db.session.query(func.min(OutCell.xaxis), func.max(OutCell.xaxis)).filter_by( cell_type=cell_file_name).one()) rs_y = list( db.session.query(func.min(OutCell.yaxis), func.max(OutCell.yaxis)).filter_by( cell_type=cell_file_name).one()) rs_data.sort(key=lambda x: int(x.cluster)) for item in rs_data: cell_tsne1.append(item.xaxis) cell_tsne2.append(item.yaxis) cell_group.append(f'cluster:{item.cluster}:{item.labels}') response = { 'x': cell_tsne1, 'y': cell_tsne2, 'group': cell_group, 'x_range': [rs_x[0] - 3, rs_x[1] + 3], 'y_range': [rs_y[0] - 3, rs_y[1] + 3], } return make_dict_response(response)
def list(self): epigenomics = db.session.query( Epigenomics.disease, Epigenomics.data_type).distinct().order_by( Epigenomics.disease.asc(), Epigenomics.data_type.asc()).all() results = defaultdict(set) for disease, data_type in epigenomics: results[disease].add(data_type) return make_dict_response(diseases=sorted(results.keys()), diseases_types={ d: [ dict(type=x, disease=d, data_key=d + '/' + x) for x in sorted(v) ] for d, v in results.items() })
def omim(self): disease = request.args.get('dis') page = request.args.get('page', 1, type=int) number = request.args.get('number', 10, type=int) if not disease: raise InvalidUsage('传递的信息不全!') query = db.session.query(GeneticOMIM).filter_by(disease=disease) paginate = query.paginate(page, number, False) response = { "data": [r.to_json_disease() for r in query], "pagination": { 'total': paginate.total, 'page': page, 'number': number, 'total_pages': paginate.pages, }, } return make_dict_response(response)
def get_single_cell_data(self): name = request.args.get('name') page = request.args.get('page') number = request.args.get('number') name = cell_colnum_mapping()[name] page = 1 if not page else int(page) number = 10 if not number else int(number) if not name: raise InvalidUsage('传递的信息不全!', 400) cell_name = db.session.query(MarkersCell).filter_by(cell_type=name) paginate = cell_name.paginate(page, number, False) data = [] if cell_name: for cn in cell_name: cell_data = { "gene": cn.gene, "cluster": cn.cluster, "annotation": cn.labels, "p_val": cn.p_val, "avg_logFC": cn.avg, "pct1": cn.pct1, "pct2": cn.pct2, } data.append(cell_data) response = { "data": data, "pagination": { 'total': paginate.total, 'page': page, 'number': number, 'total_pages': paginate.pages, }, } return make_dict_response(response)
def statistic(self): gene_count = db.session.query(Gene.symbol).count() gwas_disease = db.session.query(GeneticGWAS.disease).distinct() omim_disease = db.session.query(GeneticOMIM.disease).distinct() disease_count = gwas_disease.union_all(omim_disease).distinct().count() gwas_variants = db.session.query(GeneticGWAS.variant).distinct() omim_variants = db.session.query(GeneticOMIM.variant).distinct() variants_count = gwas_variants.union_all( omim_variants).distinct().count() gwas_study = db.session.query(GeneticGWAS.study).distinct() omim_study = db.session.query(GeneticOMIM.publications).distinct() study_count = gwas_study.union_all(omim_study).distinct().count() return make_dict_response( data={ 'gene': gene_count, 'disease': disease_count, 'snp': variants_count, 'studies': study_count })
def analysis_co(self): gene = request.args.get('gene') species = json.loads(request.args.get('species')) tt = request.args.get('ThresholdThreshold') tn = request.args.get('Threshold_numbwe') try: genes = gene.split(',') except: raise InvalidUsage('not found gene', 400) result = { 'data1': [], 'data2': [], } if not genes or not species['microarray_disease'] and not species[ 'microarray_tissue']: return make_dict_response(data=result) if species['microarray_disease']: db_name = mapping_disease_tissue()[species['microarray_disease'] [0]] gene_disease_query = db.session.query(db_name).filter( or_(db_name.gene.in_(genes), db_name.contrast_gene.in_(genes))) gds = db.session.query(GeneDiseaseCorrect).filter( and_( GeneDiseaseCorrect.gene.in_(genes), GeneDiseaseCorrect.disease.in_( species['microarray_disease']))) if tt: # 0.1 gene_disease_query = gene_disease_query.filter( or_(db_name.weight > float(tt), db_name.weight < -float(tt))) gds = gds.filter( or_(GeneDiseaseCorrect.weight > float(tt), GeneDiseaseCorrect.weight < -float(tt))) if tn: gene_disease_query = gene_disease_query.limit(float(tn)) gds = gds.limit(float(tn)) result['data1'] = [ g.to_json() if g.contrast_gene not in genes else g.to_reverse_json() for g in gene_disease_query.all() ] result['data2'] = [g.to_json() for g in gds.all()] if species['microarray_tissue']: db_name = mapping_disease_tissue()[species['microarray_tissue'][0]] gene_disease_query = db.session.query(db_name).filter( or_(db_name.gene.in_(genes), db_name.contrast_gene.in_(genes))) tgs = db.session.query(GeneTissueCorrect).filter( and_( GeneTissueCorrect.gene.in_(genes), GeneTissueCorrect.tissue.in_( species['microarray_tissue']))) if tt: gene_disease_query = gene_disease_query.filter( or_(db_name.weight > float(tt), db_name.weight < -float(tt))) tgs = tgs.filter( or_(GeneTissueCorrect.weight > float(tt), GeneTissueCorrect.weight < -float(tt))) if tn: gene_disease_query = gene_disease_query.limit(float(tn)) tgs = tgs.limit(float(tn)) result['data1'] = [ g.to_json() if g.contrast_gene not in genes else g.to_reverse_json() for g in gene_disease_query.all() ] result['data2'] = [g.to_json() for g in tgs.all()] return make_dict_response(data=result)
def internal_server_error(e): return make_dict_response({ "code": '500', "title": '500 Internal Server Error', "message": f'Internal Server Error: {e.description}', }, status_code=500)
def alteration(self): ep_alt = db.session.query(EpigeneticAlteration).all() data = [i.to_json() for i in ep_alt] return make_dict_response(data=data)
def heatmap_data(self): import matplotlib.pyplot as plt _type = request.args.get('type') platform = request.args.get('platform') genes = split_params(request.args.get('gene')) if _type == 'mouse': datasets = split_params(request.args.get('tissue')) # if len(datasets) < 2: # raise InvalidUsage('Too few data sets', 400) table_model = self._get_table_name(_type) query = db.session.query(table_model).filter( table_model.gene.in_(genes)) df = pd.read_sql(query.statement, db.engine, index_col='gene') df = df.rename(columns=table_model.column_mappings()) df = df[datasets] else: types = platform + '_' + 'tissue' datasets1 = split_params(request.args.get('tissue')) table_model = self._get_table_name(types) query = db.session.query(table_model).filter( table_model.gene.in_(genes)).order_by(asc(table_model.gene)) df1 = pd.read_sql(query.statement, db.engine, index_col='gene') df1 = df1.rename(columns=table_model.column_mappings()) df1 = df1[datasets1] types = platform + '_' + 'disease' datasets2 = split_params(request.args.get('disease')) table_model = self._get_table_name(types) query = db.session.query(table_model).filter( table_model.gene.in_(genes)).order_by(asc(table_model.gene)) df2 = pd.read_sql(query.statement, db.engine, index_col='gene') df2 = df2.rename(columns=table_model.column_mappings()) df2 = df2[datasets2] # if len(datasets1 + datasets2) < 2: # raise InvalidUsage('Too few data sets', 400) df = concat([df1, df2], join="inner", axis=1) # print(df) df.index.name = None if len(df) == 0: empty_df = df.reset_index() return make_dict_response( fig=None, data=empty_df.to_dict('records'), columns=empty_df.columns.tolist(), ) # cellSizePixels = 75 # dpi = matplotlib.rcParams['figure.dpi'] # marginWidth = matplotlib.rcParams['figure.subplot.right']-matplotlib.rcParams['figure.subplot.left'] # marginHeight = matplotlib.rcParams['figure.subplot.top']-matplotlib.rcParams['figure.subplot.bottom'] # Ny,Nx = df.shape # figWidth = (Nx*cellSizePixels/dpi)/0.8/marginWidth # figHeigh = (Ny*cellSizePixels/dpi)/0.8/marginHeight row_cluster = df.shape[0] > 1 col_cluster = df.shape[1] > 1 df = df.fillna(0) sns.set(font_scale=1.2) cluster = sns.clustermap(df, linewidths=1, cmap="mako", vmin=0, vmax=10, row_cluster=row_cluster, col_cluster=col_cluster) # axWidth = (Nx*cellSizePixels)/(figWidth*dpi) # axHeight = (Ny*cellSizePixels)/(figHeigh*dpi) # # # resize heatmap # ax_heatmap_orig_pos = cluster.ax_heatmap.get_position() # cluster.ax_heatmap.set_position([ax_heatmap_orig_pos.x0, ax_heatmap_orig_pos.y0, # axWidth, axHeight]) # # # resize dendrograms to match # ax_row_orig_pos = cluster.ax_row_dendrogram.get_position() # cluster.ax_row_dendrogram.set_position([ax_row_orig_pos.x0, ax_row_orig_pos.y0, # ax_row_orig_pos.width, axHeight]) # ax_col_orig_pos = cluster.ax_col_dendrogram.get_position() # cluster.ax_col_dendrogram.set_position([ax_col_orig_pos.x0, ax_heatmap_orig_pos.y0+axHeight, # axWidth, ax_col_orig_pos.height]) plt.setp(cluster.ax_heatmap.xaxis.get_majorticklabels(), rotation=45, horizontalalignment='right') img = io.BytesIO() cluster.savefig(img, format='png') img.seek(0) fig = base64.b64encode(img.getvalue()).decode('utf-8') # Safari fix: https://stackoverflow.com/questions/27396376/base64-image-tag-in-safari-did-not-showed-up # pad_num = 4 - (len(fig) % 4) # if pad_num < 4: # fig = fig + ('=' * pad_num) cluster_data = cluster.data2d.round(4).reset_index() return make_dict_response( fig='data:image/png;base64,' + fig, data=cluster_data.to_dict('records'), columns=cluster_data.columns.tolist(), )