Esempio n. 1
0
    def test_is_poplar_gene_valid(self):
        # Valid gene
        result = BARUtils.is_poplar_gene_valid('Potri.019G123900.1')
        self.assertTrue(result)

        # Invalid gene
        result = BARUtils.is_poplar_gene_valid('abc')
        self.assertFalse(result)
Esempio n. 2
0
    def get(self, fixed_pdb='', moving_pdb=''):
        """This end point returns the superimposition of the moving PDB onto moving PDB in PDB format"""

        fixed_pdb = escape(fixed_pdb)
        moving_pdb = escape(moving_pdb)

        arabidopsis_pdb_path = '/var/www/html/eplant_legacy/java/Phyre2-Models/Phyre2_'
        poplar_pdb_path = '/var/www/html/eplant_poplar/pdb/'
        phenix_pdb_link = 'https://bar.utoronto.ca/phenix-pdbs/'
        phenix_pdb_path = '/var/www/html/phenix-pdbs/'

        # Check if genes ids are valid
        if BARUtils.is_arabidopsis_gene_valid(fixed_pdb):
            fixed_pdb_path = arabidopsis_pdb_path + fixed_pdb.upper() + '.pdb'
        elif BARUtils.is_poplar_gene_valid(fixed_pdb):
            fixed_pdb_path = poplar_pdb_path + BARUtils.format_poplar(
                fixed_pdb) + '.pdb'
        else:
            return BARUtils.error_exit('Invalid fixed pdb gene id'), 400

        if BARUtils.is_arabidopsis_gene_valid(moving_pdb):
            moving_pdb_path = arabidopsis_pdb_path + moving_pdb.upper(
            ) + '.pdb'
        elif BARUtils.is_poplar_gene_valid(moving_pdb):
            moving_pdb_path = poplar_pdb_path + BARUtils.format_poplar(
                moving_pdb) + '.pdb'
        else:
            return BARUtils.error_exit('Invalid moving pdb gene id'), 400

        # Check if model already exists
        phenix_file_name = fixed_pdb.upper() + "-" + moving_pdb.upper(
        ) + "-phenix.pdb"
        response = requests.get(phenix_pdb_link + phenix_file_name)

        # If not, generate the model
        if response.status_code != 200:
            subprocess.run([
                'phenix.superpose_pdbs',
                'file_name=' + phenix_pdb_path + phenix_file_name,
                fixed_pdb_path, moving_pdb_path
            ])

        return redirect(phenix_pdb_link + phenix_file_name)
Esempio n. 3
0
    def get(self, gene_id=''):
        """ Endpoint returns annotated SNP poplar data in order of (to match A th API format):
            AA pos (zero-indexed), sample id, 'missense_variant','MODERATE', 'MISSENSE', codon/DNA base change,
            AA change (DH), pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype
            values with single quotes are fixed """
        results_json = []

        # Escape input
        gene_id = escape(gene_id)

        if BARUtils.is_poplar_gene_valid(gene_id) is False:
            return BARUtils.error_exit('Invalid gene id'), 400

        try:
            rows = db.session.query(ProteinReference, SnpsToProtein, SnpsReference). \
                select_from(ProteinReference). \
                join(SnpsToProtein). \
                join(SnpsReference). \
                filter(ProteinReference.gene_identifier == gene_id).all()

            # BAR A Th API format is chr, AA pos (zero-indexed), sample id, 'missense_variant',
            # 'MODERATE', 'MISSENSE', codon/DNA base change, AA change (DH),
            # pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype
            for protein, snpsjoin, snpstbl in rows:
                itm_lst = [
                    snpstbl.chromosome,
                    # snpstbl.chromosomal_loci,
                    snpsjoin.aa_pos - 1,  # zero index-ed
                    snpstbl.sample_id,
                    'missense_variant',
                    'MODERATE',
                    'MISSENSE',
                    str(snpsjoin.transcript_pos) + snpsjoin.ref_DNA + '>' +
                    snpsjoin.alt_DNA,
                    snpsjoin.ref_aa + snpsjoin.alt_aa,
                    None,
                    re.sub(r".\d$", '', protein.gene_identifier),
                    'protein_coding',
                    'CODING',
                    protein.gene_identifier,
                    None,
                ]
                results_json.append(itm_lst)
        except OperationalError:
            return BARUtils.error_exit('An internal error has occurred'), 500

        # Return results if there are data
        if len(results_json) > 0:
            return BARUtils.success_exit(results_json)
        else:
            return BARUtils.error_exit(
                'There are no data found for the given gene')
Esempio n. 4
0
    def get(self, species="", gene_id=""):
        """This end point provides gene isoforms given a gene ID.
        Only genes/isoforms with pdb structures are returned"""
        gene_isoforms = []

        # Escape input
        species = escape(species)
        gene_id = escape(gene_id)

        # Set the database and check if genes are valid
        if species == "arabidopsis":
            database = eplant2_isoforms()

            if not BARUtils.is_arabidopsis_gene_valid(gene_id):
                return BARUtils.error_exit("Invalid gene id"), 400

        elif species == "poplar":
            database = eplant_poplar_isoforms

            if not BARUtils.is_poplar_gene_valid(gene_id):
                return BARUtils.error_exit("Invalid gene id"), 400

            # Format the gene first
            gene_id = BARUtils.format_poplar(gene_id)

        elif species == "tomato":
            database = eplant_tomato_isoforms

            if not BARUtils.is_tomato_gene_valid(gene_id, False):
                return BARUtils.error_exit("Invalid gene id"), 400
        else:
            return BARUtils.error_exit("No data for the given species")

        # Now get the data
        try:
            rows = database.query.filter_by(gene=gene_id).all()
        except OperationalError:
            return BARUtils.error_exit("An internal error has occurred"), 500
        [gene_isoforms.append(row.isoform) for row in rows]

        # Found isoforms
        if len(gene_isoforms) > 0:
            return BARUtils.success_exit(gene_isoforms)
        else:
            return BARUtils.error_exit(
                "There are no data found for the given gene")
Esempio n. 5
0
    def get(self, species='', gene_id=''):
        """This end point provides gene isoforms given a gene ID.
        Only genes/isoforms with pdb structures are returned"""
        gene_isoforms = []

        # Escape input
        species = escape(species)
        gene_id = escape(gene_id)

        if species == 'arabidopsis':
            if BARUtils.is_arabidopsis_gene_valid(gene_id):
                try:
                    rows = isoforms.query.filter_by(gene=gene_id).all()
                except OperationalError:
                    return BARUtils.error_exit(
                        'An internal error has occurred'), 500
                [gene_isoforms.append(row.isoform) for row in rows]

                # Found isoforms
                if len(gene_isoforms) > 0:
                    return BARUtils.success_exit(gene_isoforms)
            else:
                return BARUtils.error_exit('Invalid gene id'), 400
        elif species == 'poplar':
            if BARUtils.is_poplar_gene_valid(gene_id):
                # Path is the location of poplar pdb file
                if os.environ.get('BAR'):
                    path = '/DATA/ePlants_Data/eplant_poplar/protein_structures/'
                else:
                    path = os.getcwd(
                    ) + '/data/gene_information/gene_isoforms/'

                path += gene_id + '.pdb'
                if os.path.exists(path) and os.path.isfile(path):
                    return BARUtils.success_exit(gene_id)
            else:
                return BARUtils.error_exit('Invalid gene id'), 400
        else:
            return BARUtils.error_exit('No data for the given species')

        return BARUtils.error_exit(
            'There are no data found for the given gene')
Esempio n. 6
0
    def post(self):
        """This end point returns gene isoforms data for a multiple genes for a species.
        Only genes/isoforms with pdb structures are returned"""

        json_data = request.get_json()
        data = {}

        # Validate json
        try:
            json_data = GeneIsoformsSchema().load(json_data)
        except ValidationError as err:
            return BARUtils.error_exit(err.messages), 400

        genes = json_data['genes']
        species = json_data['species']

        # Set species and check gene ID format
        if species == 'arabidopsis':
            # Check if gene is valid
            for gene in genes:
                if not BARUtils.is_arabidopsis_gene_valid(gene):
                    return BARUtils.error_exit('Invalid gene id'), 400

            # Query the database
            database = isoforms()
            try:
                rows = database.query.filter(isoforms.gene.in_(genes)).all()
            except OperationalError:
                return BARUtils.error_exit(
                    'An internal error has occurred.'), 500

            if len(rows) > 0:
                for row in rows:
                    if row.gene in data:
                        data[row.gene].append(row.isoform)
                    else:
                        data[row.gene] = []
                        data[row.gene].append(row.isoform)

                return BARUtils.success_exit(data)

            else:
                return BARUtils.error_exit(
                    'No data for the given species/genes'), 400

        elif species == 'poplar':
            for gene in genes:
                # Check if gene is valid
                if not BARUtils.is_poplar_gene_valid(gene):
                    return BARUtils.error_exit('Invalid gene id'), 400

            # Path is the location of poplar pdb file
            if os.environ.get('BAR'):
                path = '/DATA/ePlants_Data/eplant_poplar/protein_structures/'
            else:
                path = os.getcwd() + '/data/gene_information/gene_isoforms/'

            # Check if the genes exist.
            for gene in genes:
                gene_path = path + gene + '.pdb'

                if os.path.exists(gene_path) and os.path.isfile(gene_path):
                    data[gene] = []
                    data[gene].append(gene)

            # Return data if gene is found
            if len(data) > 0:
                return BARUtils.success_exit(data)
            else:
                return BARUtils.error_exit(
                    'No data for the given species/genes'), 400

        else:
            return BARUtils.error_exit('Invalid species'), 400
Esempio n. 7
0
    def post(self):
        """This end point returns gene isoforms data for a multiple genes for a species.
        Only genes/isoforms with pdb structures are returned"""

        json_data = request.get_json()
        data = {}

        # Validate json
        try:
            json_data = GeneIsoformsSchema().load(json_data)
        except ValidationError as err:
            return BARUtils.error_exit(err.messages), 400

        genes = json_data["genes"]
        species = json_data["species"]

        # Set species and check gene ID format
        if species == "arabidopsis":
            database = eplant2_isoforms()

            # Check if gene is valid
            for gene in genes:
                if not BARUtils.is_arabidopsis_gene_valid(gene):
                    return BARUtils.error_exit("Invalid gene id"), 400

            # Query must be run individually for each species
            try:
                rows = database.query.filter(
                    eplant2_isoforms.gene.in_(genes)).all()
            except OperationalError:
                return BARUtils.error_exit(
                    "An internal error has occurred."), 500

        elif species == "poplar":
            database = eplant_poplar_isoforms()

            for gene in genes:
                # Check if gene is valid
                if not BARUtils.is_poplar_gene_valid(gene):
                    return BARUtils.error_exit("Invalid gene id"), 400

            try:
                rows = database.query.filter(
                    eplant_poplar_isoforms.gene.in_(genes)).all()
            except OperationalError:
                return BARUtils.error_exit(
                    "An internal error has occurred."), 500

        elif species == "tomato":
            database = eplant_tomato_isoforms()

            for gene in genes:
                # Check if gene is valid
                if not BARUtils.is_tomato_gene_valid(gene, False):
                    return BARUtils.error_exit("Invalid gene id"), 400

            try:
                rows = database.query.filter(
                    eplant_tomato_isoforms.gene.in_(genes)).all()
            except OperationalError:
                return BARUtils.error_exit(
                    "An internal error has occurred."), 500

        else:
            return BARUtils.error_exit("Invalid species"), 400

        # If there any isoforms found, return data
        if len(rows) > 0:
            for row in rows:
                if row.gene in data:
                    data[row.gene].append(row.isoform)
                else:
                    data[row.gene] = []
                    data[row.gene].append(row.isoform)

            return BARUtils.success_exit(data)

        else:
            return BARUtils.error_exit(
                "No data for the given species/genes"), 400