def test_is_arabidopsis_gene_valid(self): # Valid gene result = BARUtils.is_arabidopsis_gene_valid('At1g01010') self.assertTrue(result) result = BARUtils.is_arabidopsis_gene_valid('At1g01010.1') self.assertTrue(result) # Invalid gene result = BARUtils.is_arabidopsis_gene_valid('abc') self.assertFalse(result) result = BARUtils.is_arabidopsis_gene_valid('At1g01010.11') self.assertFalse(result)
def test_is_arabidopsis_gene_valid(self): # Valid gene result = BARUtils.is_arabidopsis_gene_valid("At1g01010") self.assertTrue(result) result = BARUtils.is_arabidopsis_gene_valid("At1g01010.1") self.assertTrue(result) # Invalid gene result = BARUtils.is_arabidopsis_gene_valid("abc") self.assertFalse(result) result = BARUtils.is_arabidopsis_gene_valid("At1g01010.11") self.assertFalse(result)
def get(self, gene_id=""): """This end point retrieves publications from ThaleMine given an AGI ID""" gene_id = escape(gene_id) # Is data valid if not BARUtils.is_arabidopsis_gene_valid(gene_id): return BARUtils.error_exit("Invalid gene id"), 400 query = ( '<query name="" model="genomic" view="Gene.publications.firstAuthor Gene.publications.issue ' "Gene.publications.journal Gene.publications.pages Gene.publications.pubMedId Gene.publications.title " 'Gene.publications.volume Gene.publications.year" longDescription="" ' 'sortOrder="Gene.publications.firstAuthor asc"><constraint path="Gene.primaryIdentifier" op="=" value="{' '}"/></query> ') query = query.format(gene_id) # Now query the web service payload = {"format": "json", "query": query} resp = requests.post( "https://bar.utoronto.ca/thalemine/service/query/results", data=payload, headers=request_headers, ) return resp.json()
def get(self, species='', gene_id=''): """This end point provides gene alias given a gene ID.""" aliases = [] # Escape input species = escape(species) gene_id = escape(gene_id) if species == 'arabidopsis': if BARUtils.is_arabidopsis_gene_valid(gene_id): try: rows = AgiAlias.query.filter_by(agi=gene_id).all() except OperationalError: return BARUtils.error_exit( 'An internal error has occurred'), 500 [aliases.append(row.alias) for row in rows] else: return BARUtils.error_exit('Invalid gene id'), 400 else: return BARUtils.error_exit('No data for the given species') # Return results if there are data if len(aliases) > 0: return BARUtils.success_exit(aliases) else: return BARUtils.error_exit( 'There are no data found for the given gene')
def get(self, table_id, sample, gene): """Returns the value for a given gene and sample. If no sample is given returns all values for that gene""" if not BARUtils.is_arabidopsis_gene_valid(gene): return BARUtils.success_exit("Invalid gene ID"), 400 else: key = request.headers.get("X-Api-Key") if SummarizationGeneExpressionUtils.decrement_uses(key): con = db.get_engine(bind="summarization") tbl = SummarizationGeneExpressionUtils.get_table_object( table_id) if sample == "": values = {} try: rows = con.execute( tbl.select(tbl.c.Value).where(tbl.c.Gene == gene)) except SQLAlchemyError: return BARUtils.error_exit( "Internal server error"), 500 for row in rows: values.update({str(row.Sample): float(row.Value)}) else: values = [] try: rows = con.execute( tbl.select(tbl.c.Value).where( tbl.c.Sample == sample).where( tbl.c.Gene == gene)) except SQLAlchemyError: return BARUtils.error_exit( "Internal server error"), 500 [values.append(row.Value) for row in rows] return BARUtils.success_exit(values) else: return BARUtils.error_exit("Invalid API key")
def get(self, fixed_pdb='', moving_pdb=''): """This end point returns the superimposition of the moving PDB onto moving PDB in PDB format""" fixed_pdb = escape(fixed_pdb) moving_pdb = escape(moving_pdb) arabidopsis_pdb_path = '/var/www/html/eplant_legacy/java/Phyre2-Models/Phyre2_' poplar_pdb_path = '/var/www/html/eplant_poplar/pdb/' phenix_pdb_link = 'https://bar.utoronto.ca/phenix-pdbs/' phenix_pdb_path = '/var/www/html/phenix-pdbs/' # Check if genes ids are valid if BARUtils.is_arabidopsis_gene_valid(fixed_pdb): fixed_pdb_path = arabidopsis_pdb_path + fixed_pdb.upper() + '.pdb' elif BARUtils.is_poplar_gene_valid(fixed_pdb): fixed_pdb_path = poplar_pdb_path + BARUtils.format_poplar( fixed_pdb) + '.pdb' else: return BARUtils.error_exit('Invalid fixed pdb gene id'), 400 if BARUtils.is_arabidopsis_gene_valid(moving_pdb): moving_pdb_path = arabidopsis_pdb_path + moving_pdb.upper( ) + '.pdb' elif BARUtils.is_poplar_gene_valid(moving_pdb): moving_pdb_path = poplar_pdb_path + BARUtils.format_poplar( moving_pdb) + '.pdb' else: return BARUtils.error_exit('Invalid moving pdb gene id'), 400 # Check if model already exists phenix_file_name = fixed_pdb.upper() + "-" + moving_pdb.upper( ) + "-phenix.pdb" response = requests.get(phenix_pdb_link + phenix_file_name) # If not, generate the model if response.status_code != 200: subprocess.run([ 'phenix.superpose_pdbs', 'file_name=' + phenix_pdb_path + phenix_file_name, fixed_pdb_path, moving_pdb_path ]) return redirect(phenix_pdb_link + phenix_file_name)
def get(self, species="", gene_id=""): """This end point provides gene isoforms given a gene ID. Only genes/isoforms with pdb structures are returned""" gene_isoforms = [] # Escape input species = escape(species) gene_id = escape(gene_id) # Set the database and check if genes are valid if species == "arabidopsis": database = eplant2_isoforms() if not BARUtils.is_arabidopsis_gene_valid(gene_id): return BARUtils.error_exit("Invalid gene id"), 400 elif species == "poplar": database = eplant_poplar_isoforms if not BARUtils.is_poplar_gene_valid(gene_id): return BARUtils.error_exit("Invalid gene id"), 400 # Format the gene first gene_id = BARUtils.format_poplar(gene_id) elif species == "tomato": database = eplant_tomato_isoforms if not BARUtils.is_tomato_gene_valid(gene_id, False): return BARUtils.error_exit("Invalid gene id"), 400 else: return BARUtils.error_exit("No data for the given species") # Now get the data try: rows = database.query.filter_by(gene=gene_id).all() except OperationalError: return BARUtils.error_exit("An internal error has occurred"), 500 [gene_isoforms.append(row.isoform) for row in rows] # Found isoforms if len(gene_isoforms) > 0: return BARUtils.success_exit(gene_isoforms) else: return BARUtils.error_exit( "There are no data found for the given gene")
def get(self, species='', gene_id=''): """This end point provides gene isoforms given a gene ID. Only genes/isoforms with pdb structures are returned""" gene_isoforms = [] # Escape input species = escape(species) gene_id = escape(gene_id) if species == 'arabidopsis': if BARUtils.is_arabidopsis_gene_valid(gene_id): try: rows = isoforms.query.filter_by(gene=gene_id).all() except OperationalError: return BARUtils.error_exit( 'An internal error has occurred'), 500 [gene_isoforms.append(row.isoform) for row in rows] # Found isoforms if len(gene_isoforms) > 0: return BARUtils.success_exit(gene_isoforms) else: return BARUtils.error_exit('Invalid gene id'), 400 elif species == 'poplar': if BARUtils.is_poplar_gene_valid(gene_id): # Path is the location of poplar pdb file if os.environ.get('BAR'): path = '/DATA/ePlants_Data/eplant_poplar/protein_structures/' else: path = os.getcwd( ) + '/data/gene_information/gene_isoforms/' path += gene_id + '.pdb' if os.path.exists(path) and os.path.isfile(path): return BARUtils.success_exit(gene_id) else: return BARUtils.error_exit('Invalid gene id'), 400 else: return BARUtils.error_exit('No data for the given species') return BARUtils.error_exit( 'There are no data found for the given gene')
def get(self, gene_id=''): """This end point retrieves Gene RIFs from ThaleMine given an AGI ID""" gene_id = escape(gene_id) # Is data valid if not BARUtils.is_arabidopsis_gene_valid(gene_id): return BARUtils.error_exit('Invalid gene id'), 400 query = '<query name="" model="genomic" view="Gene.geneRifs.annotation Gene.geneRifs.timeStamp ' \ 'Gene.geneRifs.publication.pubMedId" longDescription="" sortOrder="Gene.geneRifs.annotation ' \ 'asc"><constraint path="Gene.primaryIdentifier" op="=" value="{}"/></query>' query = query.format(gene_id) # Now query the web service payload = {'format': 'json', 'query': query} resp = requests.post( 'https://bar.utoronto.ca/thalemine/service/query/results', data=payload, headers=request_headers) return resp.json()
def get(self, gene_id='', top_n=''): """This end point is a proxy for ATTED-II api version 4. This is used by ThaleMine. This end point is currently not cached. """ gene_id = escape(gene_id) top_n = escape(top_n) # Is data valid if not BARUtils.is_arabidopsis_gene_valid(gene_id): return BARUtils.error_exit('Invalid gene id'), 400 if not BARUtils.is_integer(top_n): return BARUtils.error_exit('Invalid count'), 400 # Now query the web service payload = {'gene': gene_id, 'topN': top_n} resp = requests.get('https://atted.jp/cgi-bin/api4.cgi', params=payload, headers=request_headers) # I think the remote API always returns status 200, so skip status checking return resp.json()
def post(self): """This end point returns gene isoforms data for a multiple genes for a species. Only genes/isoforms with pdb structures are returned""" json_data = request.get_json() data = {} # Validate json try: json_data = GeneIsoformsSchema().load(json_data) except ValidationError as err: return BARUtils.error_exit(err.messages), 400 genes = json_data['genes'] species = json_data['species'] # Set species and check gene ID format if species == 'arabidopsis': # Check if gene is valid for gene in genes: if not BARUtils.is_arabidopsis_gene_valid(gene): return BARUtils.error_exit('Invalid gene id'), 400 # Query the database database = isoforms() try: rows = database.query.filter(isoforms.gene.in_(genes)).all() except OperationalError: return BARUtils.error_exit( 'An internal error has occurred.'), 500 if len(rows) > 0: for row in rows: if row.gene in data: data[row.gene].append(row.isoform) else: data[row.gene] = [] data[row.gene].append(row.isoform) return BARUtils.success_exit(data) else: return BARUtils.error_exit( 'No data for the given species/genes'), 400 elif species == 'poplar': for gene in genes: # Check if gene is valid if not BARUtils.is_poplar_gene_valid(gene): return BARUtils.error_exit('Invalid gene id'), 400 # Path is the location of poplar pdb file if os.environ.get('BAR'): path = '/DATA/ePlants_Data/eplant_poplar/protein_structures/' else: path = os.getcwd() + '/data/gene_information/gene_isoforms/' # Check if the genes exist. for gene in genes: gene_path = path + gene + '.pdb' if os.path.exists(gene_path) and os.path.isfile(gene_path): data[gene] = [] data[gene].append(gene) # Return data if gene is found if len(data) > 0: return BARUtils.success_exit(data) else: return BARUtils.error_exit( 'No data for the given species/genes'), 400 else: return BARUtils.error_exit('Invalid species'), 400
def get_data(species, database, gene_id, sample_ids=None): """This function is used to query the database for gene expression :param species: name of species :param database: name of BAR database :param gene_id: gene id in the data_probeset column :param sample_ids: sample ids in the data_bot_id column :return: dict gene expression data """ if sample_ids is None: sample_ids = [] data = {} # Set species and check gene ID format if species == 'arabidopsis': if not BARUtils.is_arabidopsis_gene_valid(gene_id): return { 'success': False, 'error': 'Invalid gene id', 'error_code': 400 } else: return { 'success': False, 'error': 'Invalid species', 'error_code': 400 } # Set model if database == 'single_cell': database = SingleCell() # Example: cluster0_WT1.ExprMean sample_regex = re.compile(r"^\D+\d+_WT\d+.ExprMean$", re.I) else: return { 'success': False, 'error': 'Invalid database', 'error_code': 400 } # Now query the database if len(sample_ids) == 0 or sample_ids is None: try: rows = database.query.filter_by(data_probeset_id=gene_id).all() except OperationalError: return { 'success': False, 'error': 'An internal error has occurred', 'error_code': 500 } if len(rows) > 0: for row in rows: data[row.data_bot_id] = row.data_signal else: # Validate all samples for sample_id in sample_ids: if not sample_regex.search(sample_id): return { 'success': False, 'error': 'Invalid sample id', 'error_code': 400 } try: # This optimizes query of MySQL in operator. rows = database.query.filter( SingleCell.data_probeset_id == gene_id, SingleCell.data_bot_id.in_(sample_ids)).all() except OperationalError: return { 'success': False, 'error': 'An internal error has occurred', 'error_code': 500 } if len(rows) > 0: for row in rows: data[row.data_bot_id] = row.data_signal return {'success': True, 'data': data}
def post(self): """This end point returns gene isoforms data for a multiple genes for a species. Only genes/isoforms with pdb structures are returned""" json_data = request.get_json() data = {} # Validate json try: json_data = GeneIsoformsSchema().load(json_data) except ValidationError as err: return BARUtils.error_exit(err.messages), 400 genes = json_data["genes"] species = json_data["species"] # Set species and check gene ID format if species == "arabidopsis": database = eplant2_isoforms() # Check if gene is valid for gene in genes: if not BARUtils.is_arabidopsis_gene_valid(gene): return BARUtils.error_exit("Invalid gene id"), 400 # Query must be run individually for each species try: rows = database.query.filter( eplant2_isoforms.gene.in_(genes)).all() except OperationalError: return BARUtils.error_exit( "An internal error has occurred."), 500 elif species == "poplar": database = eplant_poplar_isoforms() for gene in genes: # Check if gene is valid if not BARUtils.is_poplar_gene_valid(gene): return BARUtils.error_exit("Invalid gene id"), 400 try: rows = database.query.filter( eplant_poplar_isoforms.gene.in_(genes)).all() except OperationalError: return BARUtils.error_exit( "An internal error has occurred."), 500 elif species == "tomato": database = eplant_tomato_isoforms() for gene in genes: # Check if gene is valid if not BARUtils.is_tomato_gene_valid(gene, False): return BARUtils.error_exit("Invalid gene id"), 400 try: rows = database.query.filter( eplant_tomato_isoforms.gene.in_(genes)).all() except OperationalError: return BARUtils.error_exit( "An internal error has occurred."), 500 else: return BARUtils.error_exit("Invalid species"), 400 # If there any isoforms found, return data if len(rows) > 0: for row in rows: if row.gene in data: data[row.gene].append(row.isoform) else: data[row.gene] = [] data[row.gene].append(row.isoform) return BARUtils.success_exit(data) else: return BARUtils.error_exit( "No data for the given species/genes"), 400