Esempio n. 1
0
def filter_gene_names_present_in_database(gene_names_of_interest):
    _session = db.create_scoped_session()
    _log.info(
        "Filtering gene names that are already present in the database ...")

    try:
        # Make sure the gene names are a set, so we can pop them
        gene_names_of_interest = set(gene_names_of_interest)

        # check which gene names are already present in the database
        n_gene_names = len(gene_names_of_interest)
        n_filtered_gene_names = 0
        for gene_name in _session.query(distinct(Gene.gene_name)).filter(
                Gene.gene_name.in_(gene_names_of_interest)).all():
            gene_names_of_interest.remove(gene_name[0])
            n_filtered_gene_names += 1

        _log.info("Filtered '" + str(n_filtered_gene_names) + "' out of '" +
                  str(n_gene_names) +
                  "' gene names that are already present in the database ...")

        return list(gene_names_of_interest)
    except:
        _log.error(traceback.format_exc())
        raise
    finally:
        # Close this session, thus all items are cleared and memory usage is kept at a minimum
        _session.remove()
Esempio n. 2
0
def add_gene_mapping_to_database(gene_mapping):
    _session = db.create_scoped_session()

    try:
        for transcription_id in gene_mapping["genes"].keys():
            with _session.no_autoflush:
                # retrieve the gene_translation
                gene_translation = gene_mapping["genes"][transcription_id]

                if transcription_id in gene_mapping["proteins"].keys():
                    # test if this protein already exists in the database
                    matching_protein = _session.query(Protein).filter_by(
                        uniprot_ac=gene_mapping["proteins"]
                        [transcription_id].uniprot_ac).first()
                    protein_already_present = True
                    if matching_protein is None:
                        # Protein is already present in the dataase, remove it from the to-be-added proteins
                        matching_protein = gene_mapping["proteins"].pop(
                            transcription_id)
                        protein_already_present = False

                    # add relationships to mapping from gene translation and protein
                    for mapping in gene_mapping["mappings"][transcription_id]:
                        gene_translation.mappings.append(mapping)
                        matching_protein.mappings.append(mapping)

                    # add relationship from protein to gene transcript
                    matching_protein.genes.append(gene_translation)

                    # add the gene translation to the database
                    _session.add(gene_translation)

                    # add the protein to the database
                    if not protein_already_present:
                        _session.add(matching_protein)

                    # add all other objects to the database
                    _session.add_all(
                        gene_mapping["mappings"][transcription_id])
                else:
                    # add the gene translation to the database
                    _session.add(gene_translation)
            # Commit the changes of this mapping
            _session.commit()
    except:
        _log.error(traceback.format_exc())
        raise
    finally:
        # Close this session, thus all items are cleared and memory usage is kept at a minimum
        _session.remove()
Esempio n. 3
0
    def retrieve_all_transcript_ids_with_mappings():
        """Retrieves all transcripts for which there are mappings"""
        # Open as session
        _session = db.create_scoped_session()

        try :
            return [transcript for transcript in _session.query(Gene.gencode_transcription_id).filter(Gene.protein_id != None).all()]
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 4
0
    def get_mappings_for_gene(_gene):
        """Retrieves all mappings for a Gene object"""
        # Open as session
        _session = db.create_scoped_session()

        try:
            return [x for x in _session.query(Mapping).filter(Mapping.gene_id == _gene.id).all()]
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 5
0
    def get_domains_for_protein(protein_id):
        """Retrieves all interpro entries for a given protein_id"""
        # Open as session
        _session = db.create_scoped_session()

        try:
            return [interpro_domain for interpro_domain in _session.query(Interpro).filter(Interpro.protein_id == protein_id).all()]
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 6
0
    def retrieve_all_transcript_ids(gene_name):
        """Retrieves all transcript ids for a gene name"""
        # Open as session
        _session = db.create_scoped_session()

        try:
            return [transcript for transcript in _session.query(Gene).filter(func.lower(Gene.gene_name) == gene_name.lower()).all()]
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 7
0
    def retrieve_all_gene_names_from_db():
        """Retrieves all gene names present in the database"""
        # Open as session
        _session = db.create_scoped_session()

        try:
            return [gene_name for gene_name in _session.query(Gene.gene_name).distinct(Gene.gene_name).all()]
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 8
0
    def retrieve_transcript_id_for_multiple_protein_ids(_protein_ids):
        """Retrieves all gencode transcripts for multiple protein ids as {gene_id: gencode_transcription_id}"""
        # Open as session
        _session = db.create_scoped_session()

        try:
            _gencode_transcription_id_per_gene_id = {}
            for gene in _session.query(Gene).filter(Gene.protein_id.in_(_protein_ids)).all():
                _gencode_transcription_id_per_gene_id[gene.id] = gene.gencode_transcription_id
            return _gencode_transcription_id_per_gene_id
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 9
0
    def retrieve_protein_id_for_multiple_protein_acs(_protein_acs):
        """Retrieves all protein ids for multiple Protein objects as {protein_ac: uniprot_id}"""
        # Open as session
        _session = db.create_scoped_session()

        try:
            _protein_id_per_protein_ac = {}
            for protein in _session.query(Protein).filter(Protein.uniprot_ac.in_(_protein_acs)).all():
                _protein_id_per_protein_ac[protein.uniprot_ac] = protein.id
            return _protein_id_per_protein_ac
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 10
0
    def get_mappings_for_multiple_protein_ids(_protein_ids):
        """Retrieves all mappings for a multiple Protein objects as {protein_id: [ Mapping ]}"""
        # Open as session
        _session = db.create_scoped_session()

        try:
            _mappings_per_protein = {}
            for mapping in _session.query(Mapping).filter(Mapping.protein_id.in_(_protein_ids)).all():
                if not mapping.protein_id in _mappings_per_protein:
                    _mappings_per_protein[mapping.protein_id] = []

                _mappings_per_protein[mapping.protein_id].append(mapping)
            return _mappings_per_protein
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 11
0
    def retrieve_protein(protein_id):
        """Retrieves the protein object for a given protein id"""
        # Open as session
        _session = db.create_scoped_session()

        try:
            return _session.query(Protein).filter(Protein.id == protein_id).one()
        except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
            raise RecoverableError(str(e))
        except MultipleResultsFound as e:
            _log.error("ProteinRepository.retrieve_protein(protein_id): Multiple results found while expecting uniqueness for protein_id '"+str(protein_id)+"'. "+str(e))
            return None
        except NoResultFound as  e:
            _log.error("ProteinRepository.retrieve_protein(protein_id): Expected results but found none for protein_id '"+str(protein_id)+"'. "+str(e))
            return None
        except:
            _log.error(traceback.format_exc())
            raise
        finally:
            # Close this session, thus all items are cleared and memory usage is kept at a minimum
            _session.remove()
Esempio n. 12
0
 def retrieve_gene(transcription_id):
     """Retrieves the gene object for a given transcript id"""
     # Open as session
     _session = db.create_scoped_session()
     try:
         return _session.query(Gene).filter(Gene.gencode_transcription_id == transcription_id).one()
     except (AlchemyResourceClosedError, AlchemyOperationalError, PsycopOperationalError) as e:
         raise RecoverableError(str(e))
     except MultipleResultsFound as e:
         error_message = "GeneRepository.retrieve_gene(transcription_id): Multiple results found while expecting uniqueness for transcription_id '"+str(transcription_id)+"'. "+str(e)
         _log.error(error_message)
         raise RepositoryException(error_message)
     except NoResultFound as e:
         error_message = "GeneRepository.retrieve_gene(transcription_id): Expected results but found none for transcription_id '"+str(transcription_id)+"'. "+str(e)
         _log.error(error_message)
         raise RepositoryException(error_message)
     except Exception as e:
         error_message = "GeneRepository.retrieve_gene(transcription_id): Unexpected exception for transcription_id '"+str(transcription_id)+"'. "+str(e)
         _log.error(error_message)
         raise RepositoryException(error_message)
     finally:
         # Close this session, thus all items are cleared and memory usage is kept at a minimum
         _session.remove()