Esempio n. 1
0
class GeneLocus(core.PolymerLocus):
    """ Knowledge of a gene

    Attributes:
        symbol (:obj:`str`): symbol

    Related attributes:
        proteins (:obj:`list` of :obj:`ProteinSpeciesType`): protein
    """

    symbol = obj_tables.StringAttribute()
    start = obj_tables.IntegerAttribute()
    end = obj_tables.IntegerAttribute()
    is_essential = obj_tables.BooleanAttribute()
    proteins = obj_tables.OneToOneAttribute(ProteinSpeciesType,
                                            related_name='gene')
    homologs = obj_tables.LongStringAttribute()
    evidence = obj_tables.OneToManyAttribute(core.Evidence,
                                             related_name='genes')
    cog = obj_tables.sci.onto.OntoTermAttribute(
        kbOnt, terms=kbOnt['WC:COG'].subclasses(), none=True)

    class Meta(obj_tables.Model.Meta):
        verbose_name = 'Gene'
        verbose_name_plural = 'Genes'
        attribute_order = ('id', 'name', 'synonyms', 'symbol', 'polymer',
                           'start', 'end', 'cog', 'homologs', 'is_essential',
                           'proteins', 'evidence', 'identifiers', 'references',
                           'comments')
Esempio n. 2
0
class Company(obj_tables.Model):
    name = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Name')
    url = obj_tables.UrlAttribute(verbose_name='URL')
    address = obj_tables.OneToOneAttribute('Address', related_name='company', verbose_name='Address')

    class Meta(obj_tables.Model.Meta):
        table_format = obj_tables.TableFormat.column
        attribute_order = (
            'name',
            'url',
            'address',
        )
        verbose_name = 'Company'
        verbose_name_plural = 'Companies'
Esempio n. 3
0
class Gene(obj_tables.Model):
    id = obj_tables.StringAttribute(primary=True,
                                    unique=True,
                                    verbose_name='Id')
    symbol = obj_tables.StringAttribute(verbose_name='Symbol')
    location = obj_tables.OneToOneAttribute('Location',
                                            related_name='genes',
                                            verbose_name='Location')

    class Meta(obj_tables.Model.Meta):
        table_format = obj_tables.TableFormat.row
        attribute_order = (
            'id',
            'symbol',
            'location',
        )
        verbose_name = 'Gene'
        verbose_name_plural = 'Genes'
Esempio n. 4
0
class Transcript(obj_tables.Model):
    id = obj_tables.StringAttribute(primary=True,
                                    unique=True,
                                    verbose_name='Id')
    gene = obj_tables.ManyToOneAttribute('Gene',
                                         related_name='transcripts',
                                         verbose_name='Gene')
    location = obj_tables.OneToOneAttribute('Location',
                                            related_name='transcripts',
                                            verbose_name='Location')

    class Meta(obj_tables.Model.Meta):
        table_format = obj_tables.TableFormat.row
        attribute_order = (
            'id',
            'gene',
            'location',
        )
        verbose_name = 'Transcript'
        verbose_name_plural = 'Transcripts'
Esempio n. 5
0
class Person(obj_tables.Model):
    name = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Name')
    type = obj_tables.EnumAttribute(['family', 'friend', 'business'], verbose_name='Type')
    company = obj_tables.ManyToOneAttribute('Company', related_name='employees', verbose_name='Company')
    email_address = obj_tables.EmailAttribute(verbose_name='Email address')
    phone_number = obj_tables.StringAttribute(verbose_name='Phone number')
    address = obj_tables.OneToOneAttribute('Address', related_name='person', verbose_name='Address')

    class Meta(obj_tables.Model.Meta):
        table_format = obj_tables.TableFormat.row
        attribute_order = (
            'name',
            'type',
            'company',
            'email_address',
            'phone_number',
            'address',
        )
        verbose_name = 'Person'
        verbose_name_plural = 'People'
Esempio n. 6
0
class ProteinSpeciesType(core.PolymerSpeciesType):
    """ Knowledge of a protein monomer

    Attributes:
        uniprot (:obj:`str`): uniprot id
        transcript (:obj:`TranscriptSpeciesType`): transcript
        coding_regions (:obj:`list` of :obj:`LocusAttribute`): CDS coordinates

    Related attributes:
        transcription_factor_regulation (:obj:`list` of `TranscriptionFactorRegulation`): transcription factor regulation
        ptm_sites (:obj:list` of `PtmSite`): protein modification sites
    """

    uniprot = obj_tables.StringAttribute()
    transcript = obj_tables.OneToOneAttribute(TranscriptSpeciesType,
                                              related_name='protein')
    coding_regions = LocusAttribute(related_name='proteins')

    class Meta(obj_tables.Model.Meta):
        verbose_name = 'Protein'
        verbose_name_plural = 'Proteins'
        attribute_order = ('id', 'name', 'uniprot', 'transcript',
                           'coding_regions', 'identifiers', 'references',
                           'comments')

    def get_seq(self, table=1, cds=True):
        """ Get the 5' to 3' sequence

        Args:
            table (:obj:`int`, optional): NCBI identifier for translation table
                                        (default = standard table)
            cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS

        Returns:
            :obj:`Bio.Seq.Seq`: sequence
        """
        ordered_cds = sorted(self.coding_regions, key=lambda x: x.start)

        dna_seq = self.transcript.gene.polymer.get_subseq(
            start=ordered_cds[0].start, end=ordered_cds[-1].end)

        adjusted_cds = [(i.start - ordered_cds[0].start, i.end - ordered_cds[0].start + 1) \
            for i in ordered_cds]

        spliced_dna_seq = Bio.Seq.Seq('', alphabet=Bio.Alphabet.DNAAlphabet())
        for i in adjusted_cds:
            spliced_dna_seq += dna_seq[i[0]:i[1]]

        if self.transcript.gene.strand == core.PolymerStrand.negative:
            spliced_dna_seq = spliced_dna_seq.reverse_complement()

        return spliced_dna_seq.transcribe().translate(table=table, cds=cds)

    def get_seq_and_start_codon(self, table=1, cds=True):
        """ Get the 5' to 3' amino acid sequence and the start codon

        Args:
            table (:obj:`int`, optional): NCBI identifier for translation table
                                        (default = standard table)
            cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS

        Returns:
            :obj:`Bio.Seq.Seq`: coding RNA sequence that will be translated
            :obj:`Bio.Seq.Seq`: amino acid sequence
            :obj:`Bio.Seq.Seq`: start codon
        """
        ordered_cds = sorted(self.coding_regions, key=lambda x: x.start)

        dna_seq = self.transcript.gene.polymer.get_subseq(
            start=ordered_cds[0].start, end=ordered_cds[-1].end)

        adjusted_cds = [(i.start - ordered_cds[0].start, i.end - ordered_cds[0].start + 1) \
            for i in ordered_cds]

        spliced_dna_seq = Bio.Seq.Seq('', alphabet=Bio.Alphabet.DNAAlphabet())
        for i in adjusted_cds:
            spliced_dna_seq += dna_seq[i[0]:i[1]]

        if self.transcript.gene.strand == core.PolymerStrand.negative:
            spliced_dna_seq = spliced_dna_seq.reverse_complement()

        coding_rna_seq = spliced_dna_seq.transcribe()
        protein_seq = coding_rna_seq.translate(table=table, cds=cds)

        start_codon_index = 0
        for aa_seq in protein_seq:
            if aa_seq == '*':
                start_codon_index += 3
            else:
                break
        start_codon = coding_rna_seq[start_codon_index:start_codon_index + 3]

        return coding_rna_seq, protein_seq, start_codon

    def get_empirical_formula(self, table=1, cds=True, seq_input=None):
        """ Get the empirical formula

        Args:
            table (:obj:`int`, optional): NCBI identifier for translation table
                                        (default = standard table)
            cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS
            seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it
                instead of reading from fasta file to reduce IO operation

        Returns:
            :obj:`chem.EmpiricalFormula`: empirical formula
        """
        if seq_input:
            seq = seq_input
        else:
            seq = self.get_seq(table=table, cds=cds)
        l = len(seq) - seq.count('*')

        n_a = seq.count('A')  # Ala: Alanine (C3 H7 N O2)
        n_r = seq.count('R')  # Arg: Arginine (C6 H14 N4 O2)
        n_n = seq.count('N')  # Asn: Asparagine (C4 H8 N2 O3)
        n_d = seq.count('D')  # Asp: Aspartic acid (C4 H7 N O4)
        n_c = seq.count('C')  # Cys: Cysteine (C3 H7 N O2 S)

        n_q = seq.count('Q')  # Gln: Glutamine (C5 H10 N2 O3)
        n_e = seq.count('E')  # Glu: Glutamic acid (C5 H9 N O4)
        n_g = seq.count('G')  # Gly: Glycine (C2 H5 N O2)
        n_h = seq.count('H')  # His: Histidine (C6 H9 N3 O2)
        n_i = seq.count('I')  # Ile: Isoleucine (C6 H13 N O2)

        n_l = seq.count('L')  # Leu: Leucine (C6 H13 N O2)
        n_k = seq.count('K')  # Lys: Lysine (C6 H14 N2 O2)
        n_m = seq.count('M')  # Met: Methionine (C5 H11 N O2 S)
        n_f = seq.count('F')  # Phe: Phenylalanine (C9 H11 N O2)
        n_p = seq.count('P')  # Pro: Proline (C5 H9 N O2)

        n_s = seq.count('S')  # Ser: Serine (C3 H7 N O3)
        n_t = seq.count('T')  # Thr: Threonine (C4 H9 N O3)
        n_w = seq.count('W')  # Trp: Tryptophan (C11 H12 N2 O2)
        n_y = seq.count('Y')  # Tyr: Tyrosine (C9 H11 N O3)
        n_v = seq.count('V')  # Val: Valine (C5 H11 N O2)

        n_u = seq.count('U')  # Selcys: Selenocysteine (C3 H7 N O2 Se)

        formula = chem.EmpiricalFormula()

        formula.C = 3 * n_a + 6 * n_r + 4 * n_n + 4 * n_d + 3 * n_c + \
            5 * n_q + 5 * n_e + 2 * n_g + 6 * n_h + 6 * n_i + \
            6 * n_l + 6 * n_k + 5 * n_m + 9 * n_f + 5 * n_p + \
            3 * n_s + 4 * n_t + 11 * n_w + 9 * n_y + 5 * n_v + \
            3 * n_u

        formula.H = 7 * n_a + 14 * n_r + 8 * n_n + 7 * n_d + 7 * n_c + \
            10 * n_q + 9 * n_e + 5 * n_g + 9 * n_h + 13 * n_i + \
            13 * n_l + 14 * n_k + 11 * n_m + 11 * n_f + 9 * n_p + \
            7 * n_s + 9 * n_t + 12 * n_w + 11 * n_y + 11 * n_v + \
            7 * n_u - 2 * (l - 1)

        formula.N = 1 * n_a + 4 * n_r + 2 * n_n + 1 * n_d + 1 * n_c + \
            2 * n_q + 1 * n_e + 1 * n_g + 3 * n_h + 1 * n_i + \
            1 * n_l + 2 * n_k + 1 * n_m + 1 * n_f + 1 * n_p + \
            1 * n_s + 1 * n_t + 2 * n_w + 1 * n_y + 1 * n_v + \
            1 * n_u

        formula.O = 2 * n_a + 2 * n_r + 3 * n_n + 4 * n_d + 2 * n_c + \
            3 * n_q + 4 * n_e + 2 * n_g + 2 * n_h + 2 * n_i + \
            2 * n_l + 2 * n_k + 2 * n_m + 2 * n_f + 2 * n_p + \
            3 * n_s + 3 * n_t + 2 * n_w + 3 * n_y + 2 * n_v + \
            2 * n_u - (l - 1)

        formula.S = n_c + n_m

        formula.Se = n_u

        return formula

    def get_charge(self, table=1, cds=True, seq_input=None):
        """ Get the charge at physiological pH

        Args:
            table (:obj:`int`, optional): NCBI identifier for translation table
                                        (default = standard table)
            cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS
            seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it
                instead of reading from fasta file to reduce IO operation

        Returns:
            :obj:`int`: charge
        """
        if seq_input:
            seq = seq_input
        else:
            seq = self.get_seq(table=table, cds=cds)

        n_r = seq.count('R')
        n_h = seq.count('H')
        n_k = seq.count('K')
        n_d = seq.count('D')
        n_e = seq.count('E')

        return (n_r + n_h + n_k) - (n_d + n_e)

    def get_mol_wt(self, table=1, cds=True, seq_input=None):
        """ Get the molecular weight

        Args:
            table (:obj:`int`, optional): NCBI identifier for translation table
                                        (default = standard table)
            cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS
            seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it
                instead of reading from fasta file to reduce IO operation

        Returns:
            :obj:`float`: molecular weight
        """
        if seq_input:
            return self.get_empirical_formula(
                table=table, cds=cds,
                seq_input=seq_input).get_molecular_weight()
        else:
            return self.get_empirical_formula(table=table,
                                              cds=cds).get_molecular_weight()