Beispiel #1
0
    def test_is_arabidopsis_gene_valid(self):
        # Valid gene
        result = BARUtils.is_arabidopsis_gene_valid('At1g01010')
        self.assertTrue(result)
        result = BARUtils.is_arabidopsis_gene_valid('At1g01010.1')
        self.assertTrue(result)

        # Invalid gene
        result = BARUtils.is_arabidopsis_gene_valid('abc')
        self.assertFalse(result)
        result = BARUtils.is_arabidopsis_gene_valid('At1g01010.11')
        self.assertFalse(result)
Beispiel #2
0
    def test_is_arabidopsis_gene_valid(self):
        # Valid gene
        result = BARUtils.is_arabidopsis_gene_valid("At1g01010")
        self.assertTrue(result)
        result = BARUtils.is_arabidopsis_gene_valid("At1g01010.1")
        self.assertTrue(result)

        # Invalid gene
        result = BARUtils.is_arabidopsis_gene_valid("abc")
        self.assertFalse(result)
        result = BARUtils.is_arabidopsis_gene_valid("At1g01010.11")
        self.assertFalse(result)
Beispiel #3
0
    def get(self, gene_id=""):
        """This end point retrieves publications from ThaleMine given an AGI ID"""
        gene_id = escape(gene_id)

        # Is data valid
        if not BARUtils.is_arabidopsis_gene_valid(gene_id):
            return BARUtils.error_exit("Invalid gene id"), 400

        query = (
            '<query name="" model="genomic" view="Gene.publications.firstAuthor Gene.publications.issue '
            "Gene.publications.journal Gene.publications.pages Gene.publications.pubMedId Gene.publications.title "
            'Gene.publications.volume Gene.publications.year" longDescription="" '
            'sortOrder="Gene.publications.firstAuthor asc"><constraint path="Gene.primaryIdentifier" op="=" value="{'
            '}"/></query> ')
        query = query.format(gene_id)

        # Now query the web service
        payload = {"format": "json", "query": query}
        resp = requests.post(
            "https://bar.utoronto.ca/thalemine/service/query/results",
            data=payload,
            headers=request_headers,
        )

        return resp.json()
Beispiel #4
0
    def get(self, species='', gene_id=''):
        """This end point provides gene alias given a gene ID."""
        aliases = []

        # Escape input
        species = escape(species)
        gene_id = escape(gene_id)

        if species == 'arabidopsis':
            if BARUtils.is_arabidopsis_gene_valid(gene_id):
                try:
                    rows = AgiAlias.query.filter_by(agi=gene_id).all()
                except OperationalError:
                    return BARUtils.error_exit(
                        'An internal error has occurred'), 500
                [aliases.append(row.alias) for row in rows]
            else:
                return BARUtils.error_exit('Invalid gene id'), 400
        else:
            return BARUtils.error_exit('No data for the given species')

        # Return results if there are data
        if len(aliases) > 0:
            return BARUtils.success_exit(aliases)
        else:
            return BARUtils.error_exit(
                'There are no data found for the given gene')
 def get(self, table_id, sample, gene):
     """Returns the value for a given gene and sample. If no sample is given returns all values for that gene"""
     if not BARUtils.is_arabidopsis_gene_valid(gene):
         return BARUtils.success_exit("Invalid gene ID"), 400
     else:
         key = request.headers.get("X-Api-Key")
         if SummarizationGeneExpressionUtils.decrement_uses(key):
             con = db.get_engine(bind="summarization")
             tbl = SummarizationGeneExpressionUtils.get_table_object(
                 table_id)
             if sample == "":
                 values = {}
                 try:
                     rows = con.execute(
                         tbl.select(tbl.c.Value).where(tbl.c.Gene == gene))
                 except SQLAlchemyError:
                     return BARUtils.error_exit(
                         "Internal server error"), 500
                 for row in rows:
                     values.update({str(row.Sample): float(row.Value)})
             else:
                 values = []
                 try:
                     rows = con.execute(
                         tbl.select(tbl.c.Value).where(
                             tbl.c.Sample == sample).where(
                                 tbl.c.Gene == gene))
                 except SQLAlchemyError:
                     return BARUtils.error_exit(
                         "Internal server error"), 500
                 [values.append(row.Value) for row in rows]
             return BARUtils.success_exit(values)
         else:
             return BARUtils.error_exit("Invalid API key")
Beispiel #6
0
    def get(self, fixed_pdb='', moving_pdb=''):
        """This end point returns the superimposition of the moving PDB onto moving PDB in PDB format"""

        fixed_pdb = escape(fixed_pdb)
        moving_pdb = escape(moving_pdb)

        arabidopsis_pdb_path = '/var/www/html/eplant_legacy/java/Phyre2-Models/Phyre2_'
        poplar_pdb_path = '/var/www/html/eplant_poplar/pdb/'
        phenix_pdb_link = 'https://bar.utoronto.ca/phenix-pdbs/'
        phenix_pdb_path = '/var/www/html/phenix-pdbs/'

        # Check if genes ids are valid
        if BARUtils.is_arabidopsis_gene_valid(fixed_pdb):
            fixed_pdb_path = arabidopsis_pdb_path + fixed_pdb.upper() + '.pdb'
        elif BARUtils.is_poplar_gene_valid(fixed_pdb):
            fixed_pdb_path = poplar_pdb_path + BARUtils.format_poplar(
                fixed_pdb) + '.pdb'
        else:
            return BARUtils.error_exit('Invalid fixed pdb gene id'), 400

        if BARUtils.is_arabidopsis_gene_valid(moving_pdb):
            moving_pdb_path = arabidopsis_pdb_path + moving_pdb.upper(
            ) + '.pdb'
        elif BARUtils.is_poplar_gene_valid(moving_pdb):
            moving_pdb_path = poplar_pdb_path + BARUtils.format_poplar(
                moving_pdb) + '.pdb'
        else:
            return BARUtils.error_exit('Invalid moving pdb gene id'), 400

        # Check if model already exists
        phenix_file_name = fixed_pdb.upper() + "-" + moving_pdb.upper(
        ) + "-phenix.pdb"
        response = requests.get(phenix_pdb_link + phenix_file_name)

        # If not, generate the model
        if response.status_code != 200:
            subprocess.run([
                'phenix.superpose_pdbs',
                'file_name=' + phenix_pdb_path + phenix_file_name,
                fixed_pdb_path, moving_pdb_path
            ])

        return redirect(phenix_pdb_link + phenix_file_name)
Beispiel #7
0
    def get(self, species="", gene_id=""):
        """This end point provides gene isoforms given a gene ID.
        Only genes/isoforms with pdb structures are returned"""
        gene_isoforms = []

        # Escape input
        species = escape(species)
        gene_id = escape(gene_id)

        # Set the database and check if genes are valid
        if species == "arabidopsis":
            database = eplant2_isoforms()

            if not BARUtils.is_arabidopsis_gene_valid(gene_id):
                return BARUtils.error_exit("Invalid gene id"), 400

        elif species == "poplar":
            database = eplant_poplar_isoforms

            if not BARUtils.is_poplar_gene_valid(gene_id):
                return BARUtils.error_exit("Invalid gene id"), 400

            # Format the gene first
            gene_id = BARUtils.format_poplar(gene_id)

        elif species == "tomato":
            database = eplant_tomato_isoforms

            if not BARUtils.is_tomato_gene_valid(gene_id, False):
                return BARUtils.error_exit("Invalid gene id"), 400
        else:
            return BARUtils.error_exit("No data for the given species")

        # Now get the data
        try:
            rows = database.query.filter_by(gene=gene_id).all()
        except OperationalError:
            return BARUtils.error_exit("An internal error has occurred"), 500
        [gene_isoforms.append(row.isoform) for row in rows]

        # Found isoforms
        if len(gene_isoforms) > 0:
            return BARUtils.success_exit(gene_isoforms)
        else:
            return BARUtils.error_exit(
                "There are no data found for the given gene")
Beispiel #8
0
    def get(self, species='', gene_id=''):
        """This end point provides gene isoforms given a gene ID.
        Only genes/isoforms with pdb structures are returned"""
        gene_isoforms = []

        # Escape input
        species = escape(species)
        gene_id = escape(gene_id)

        if species == 'arabidopsis':
            if BARUtils.is_arabidopsis_gene_valid(gene_id):
                try:
                    rows = isoforms.query.filter_by(gene=gene_id).all()
                except OperationalError:
                    return BARUtils.error_exit(
                        'An internal error has occurred'), 500
                [gene_isoforms.append(row.isoform) for row in rows]

                # Found isoforms
                if len(gene_isoforms) > 0:
                    return BARUtils.success_exit(gene_isoforms)
            else:
                return BARUtils.error_exit('Invalid gene id'), 400
        elif species == 'poplar':
            if BARUtils.is_poplar_gene_valid(gene_id):
                # Path is the location of poplar pdb file
                if os.environ.get('BAR'):
                    path = '/DATA/ePlants_Data/eplant_poplar/protein_structures/'
                else:
                    path = os.getcwd(
                    ) + '/data/gene_information/gene_isoforms/'

                path += gene_id + '.pdb'
                if os.path.exists(path) and os.path.isfile(path):
                    return BARUtils.success_exit(gene_id)
            else:
                return BARUtils.error_exit('Invalid gene id'), 400
        else:
            return BARUtils.error_exit('No data for the given species')

        return BARUtils.error_exit(
            'There are no data found for the given gene')
Beispiel #9
0
    def get(self, gene_id=''):
        """This end point retrieves Gene RIFs from ThaleMine given an AGI ID"""
        gene_id = escape(gene_id)

        # Is data valid
        if not BARUtils.is_arabidopsis_gene_valid(gene_id):
            return BARUtils.error_exit('Invalid gene id'), 400

        query = '<query name="" model="genomic" view="Gene.geneRifs.annotation Gene.geneRifs.timeStamp ' \
                'Gene.geneRifs.publication.pubMedId" longDescription="" sortOrder="Gene.geneRifs.annotation ' \
                'asc"><constraint path="Gene.primaryIdentifier" op="=" value="{}"/></query>'
        query = query.format(gene_id)

        # Now query the web service
        payload = {'format': 'json', 'query': query}
        resp = requests.post(
            'https://bar.utoronto.ca/thalemine/service/query/results',
            data=payload,
            headers=request_headers)

        return resp.json()
Beispiel #10
0
    def get(self, gene_id='', top_n=''):
        """This end point is a proxy for ATTED-II api version 4.
        This is used by ThaleMine.
        This end point is currently not cached.
        """
        gene_id = escape(gene_id)
        top_n = escape(top_n)

        # Is data valid
        if not BARUtils.is_arabidopsis_gene_valid(gene_id):
            return BARUtils.error_exit('Invalid gene id'), 400

        if not BARUtils.is_integer(top_n):
            return BARUtils.error_exit('Invalid count'), 400

        # Now query the web service
        payload = {'gene': gene_id, 'topN': top_n}
        resp = requests.get('https://atted.jp/cgi-bin/api4.cgi',
                            params=payload,
                            headers=request_headers)

        # I think the remote API always returns status 200, so skip status checking
        return resp.json()
Beispiel #11
0
    def post(self):
        """This end point returns gene isoforms data for a multiple genes for a species.
        Only genes/isoforms with pdb structures are returned"""

        json_data = request.get_json()
        data = {}

        # Validate json
        try:
            json_data = GeneIsoformsSchema().load(json_data)
        except ValidationError as err:
            return BARUtils.error_exit(err.messages), 400

        genes = json_data['genes']
        species = json_data['species']

        # Set species and check gene ID format
        if species == 'arabidopsis':
            # Check if gene is valid
            for gene in genes:
                if not BARUtils.is_arabidopsis_gene_valid(gene):
                    return BARUtils.error_exit('Invalid gene id'), 400

            # Query the database
            database = isoforms()
            try:
                rows = database.query.filter(isoforms.gene.in_(genes)).all()
            except OperationalError:
                return BARUtils.error_exit(
                    'An internal error has occurred.'), 500

            if len(rows) > 0:
                for row in rows:
                    if row.gene in data:
                        data[row.gene].append(row.isoform)
                    else:
                        data[row.gene] = []
                        data[row.gene].append(row.isoform)

                return BARUtils.success_exit(data)

            else:
                return BARUtils.error_exit(
                    'No data for the given species/genes'), 400

        elif species == 'poplar':
            for gene in genes:
                # Check if gene is valid
                if not BARUtils.is_poplar_gene_valid(gene):
                    return BARUtils.error_exit('Invalid gene id'), 400

            # Path is the location of poplar pdb file
            if os.environ.get('BAR'):
                path = '/DATA/ePlants_Data/eplant_poplar/protein_structures/'
            else:
                path = os.getcwd() + '/data/gene_information/gene_isoforms/'

            # Check if the genes exist.
            for gene in genes:
                gene_path = path + gene + '.pdb'

                if os.path.exists(gene_path) and os.path.isfile(gene_path):
                    data[gene] = []
                    data[gene].append(gene)

            # Return data if gene is found
            if len(data) > 0:
                return BARUtils.success_exit(data)
            else:
                return BARUtils.error_exit(
                    'No data for the given species/genes'), 400

        else:
            return BARUtils.error_exit('Invalid species'), 400
    def get_data(species, database, gene_id, sample_ids=None):
        """This function is used to query the database for gene expression
        :param species: name of species
        :param database: name of BAR database
        :param gene_id: gene id in the data_probeset column
        :param sample_ids: sample ids in the data_bot_id column
        :return: dict gene expression data
        """
        if sample_ids is None:
            sample_ids = []
        data = {}

        # Set species and check gene ID format
        if species == 'arabidopsis':
            if not BARUtils.is_arabidopsis_gene_valid(gene_id):
                return {
                    'success': False,
                    'error': 'Invalid gene id',
                    'error_code': 400
                }
        else:
            return {
                'success': False,
                'error': 'Invalid species',
                'error_code': 400
            }

        # Set model
        if database == 'single_cell':
            database = SingleCell()
            # Example: cluster0_WT1.ExprMean
            sample_regex = re.compile(r"^\D+\d+_WT\d+.ExprMean$", re.I)
        else:
            return {
                'success': False,
                'error': 'Invalid database',
                'error_code': 400
            }

        # Now query the database
        if len(sample_ids) == 0 or sample_ids is None:
            try:
                rows = database.query.filter_by(data_probeset_id=gene_id).all()
            except OperationalError:
                return {
                    'success': False,
                    'error': 'An internal error has occurred',
                    'error_code': 500
                }

            if len(rows) > 0:
                for row in rows:
                    data[row.data_bot_id] = row.data_signal
        else:
            # Validate all samples
            for sample_id in sample_ids:
                if not sample_regex.search(sample_id):
                    return {
                        'success': False,
                        'error': 'Invalid sample id',
                        'error_code': 400
                    }

            try:
                # This optimizes query of MySQL in operator.
                rows = database.query.filter(
                    SingleCell.data_probeset_id == gene_id,
                    SingleCell.data_bot_id.in_(sample_ids)).all()
            except OperationalError:
                return {
                    'success': False,
                    'error': 'An internal error has occurred',
                    'error_code': 500
                }

            if len(rows) > 0:
                for row in rows:
                    data[row.data_bot_id] = row.data_signal

        return {'success': True, 'data': data}
Beispiel #13
0
    def post(self):
        """This end point returns gene isoforms data for a multiple genes for a species.
        Only genes/isoforms with pdb structures are returned"""

        json_data = request.get_json()
        data = {}

        # Validate json
        try:
            json_data = GeneIsoformsSchema().load(json_data)
        except ValidationError as err:
            return BARUtils.error_exit(err.messages), 400

        genes = json_data["genes"]
        species = json_data["species"]

        # Set species and check gene ID format
        if species == "arabidopsis":
            database = eplant2_isoforms()

            # Check if gene is valid
            for gene in genes:
                if not BARUtils.is_arabidopsis_gene_valid(gene):
                    return BARUtils.error_exit("Invalid gene id"), 400

            # Query must be run individually for each species
            try:
                rows = database.query.filter(
                    eplant2_isoforms.gene.in_(genes)).all()
            except OperationalError:
                return BARUtils.error_exit(
                    "An internal error has occurred."), 500

        elif species == "poplar":
            database = eplant_poplar_isoforms()

            for gene in genes:
                # Check if gene is valid
                if not BARUtils.is_poplar_gene_valid(gene):
                    return BARUtils.error_exit("Invalid gene id"), 400

            try:
                rows = database.query.filter(
                    eplant_poplar_isoforms.gene.in_(genes)).all()
            except OperationalError:
                return BARUtils.error_exit(
                    "An internal error has occurred."), 500

        elif species == "tomato":
            database = eplant_tomato_isoforms()

            for gene in genes:
                # Check if gene is valid
                if not BARUtils.is_tomato_gene_valid(gene, False):
                    return BARUtils.error_exit("Invalid gene id"), 400

            try:
                rows = database.query.filter(
                    eplant_tomato_isoforms.gene.in_(genes)).all()
            except OperationalError:
                return BARUtils.error_exit(
                    "An internal error has occurred."), 500

        else:
            return BARUtils.error_exit("Invalid species"), 400

        # If there any isoforms found, return data
        if len(rows) > 0:
            for row in rows:
                if row.gene in data:
                    data[row.gene].append(row.isoform)
                else:
                    data[row.gene] = []
                    data[row.gene].append(row.isoform)

            return BARUtils.success_exit(data)

        else:
            return BARUtils.error_exit(
                "No data for the given species/genes"), 400