Ejemplo n.º 1
0
class RegulatoryModule(obj_tables.Model):
    """ Knowledge about regulatory modules

    Attributes:
        id (:obj:`str`): identifier
        name (:obj:`str`): name
        gene (:obj:`GeneLocus`): gene
        promoter (:obj:`str`): promoter ensembl ID
        activity (:obj:`ActivityLevel`): cell-type specific activity level
        type (:obj:`RegulationType`): type of regulation (proximal or distal)
        transcription_factor_regulation (:obj:`TranscriptionFactorRegulation`): 
            transcription factor and direction of regulation
        comments (:obj:`str`): comments
        references (:obj:`list` of :obj:`Reference`): references
        identifiers (:obj:`list` of :obj:`Identifier`): identifiers
    """
    id = obj_tables.SlugAttribute(primary=True, unique=True)
    name = obj_tables.StringAttribute()
    gene = obj_tables.ManyToOneAttribute(GeneLocus,
                                         related_name='regulatory_modules')
    promoter = obj_tables.StringAttribute()
    activity = obj_tables.EnumAttribute(ActivityLevel)
    type = obj_tables.EnumAttribute(RegulationType)
    transcription_factor_regulation = RegDirectionAttribute(
        related_name='regulatory_modules')
    comments = obj_tables.LongStringAttribute()
    references = obj_tables.ManyToManyAttribute(
        core.Reference, related_name='regulatory_modules')
    identifiers = core.IdentifierAttribute(related_name='regulatory_modules')

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'gene', 'promoter', 'activity',
                           'type', 'transcription_factor_regulation',
                           'identifiers', 'references', 'comments')
Ejemplo n.º 2
0
class PtmSite(core.PolymerLocus):
    """ Knowledge of protein modification sites

    Attributes:
        modified_protein (:obj:`ProteinSpeciesType`): modified protein
        type (:obj:`str`): type of modification (phosphorylation, methylation, etc...)
        modified_residue (:obj:`str`): residue name and position in protein sequence
        fractional_abundance (:obj:`int`): ratio of modified protein abundance
    """
    type = obj_tables.StringAttribute()
    modified_protein = obj_tables.ManyToOneAttribute('ProteinSpeciesType',
                                                     related_name='ptm_sites')
    modified_residue = obj_tables.StringAttribute()
    fractional_abundance = obj_tables.FloatAttribute()

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('id', 'name', 'modified_protein', 'type',
                           'modified_residue', 'fractional_abundance',
                           'identifiers', 'references', 'comments')
Ejemplo n.º 3
0
class Transcript(obj_tables.Model):
    id = obj_tables.StringAttribute(primary=True,
                                    unique=True,
                                    verbose_name='Id')
    gene = obj_tables.ManyToOneAttribute('Gene',
                                         related_name='transcripts',
                                         verbose_name='Gene')
    location = obj_tables.OneToOneAttribute('Location',
                                            related_name='transcripts',
                                            verbose_name='Location')

    class Meta(obj_tables.Model.Meta):
        table_format = obj_tables.TableFormat.row
        attribute_order = (
            'id',
            'gene',
            'location',
        )
        verbose_name = 'Transcript'
        verbose_name_plural = 'Transcripts'
Ejemplo n.º 4
0
class Person(obj_tables.Model):
    name = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Name')
    type = obj_tables.EnumAttribute(['family', 'friend', 'business'], verbose_name='Type')
    company = obj_tables.ManyToOneAttribute('Company', related_name='employees', verbose_name='Company')
    email_address = obj_tables.EmailAttribute(verbose_name='Email address')
    phone_number = obj_tables.StringAttribute(verbose_name='Phone number')
    address = obj_tables.OneToOneAttribute('Address', related_name='person', verbose_name='Address')

    class Meta(obj_tables.Model.Meta):
        table_format = obj_tables.TableFormat.row
        attribute_order = (
            'name',
            'type',
            'company',
            'email_address',
            'phone_number',
            'address',
        )
        verbose_name = 'Person'
        verbose_name_plural = 'People'
Ejemplo n.º 5
0
class TranscriptSpeciesType(core.PolymerSpeciesType):
    """ Knowledge of a transcript (spliced RNA) species

    Attributes:
        gene (:obj:`GeneLocus`): gene         
        exons (:obj:`list` of :obj:`LocusAttribute`): exon coordinates
        type (:obj:`TranscriptType`): type

    Related attributes:
        protein (:obj:`ProteinSpeciesType`): protein
    """
    gene = obj_tables.ManyToOneAttribute(GeneLocus, related_name='transcripts')
    exons = LocusAttribute(related_name='transcripts')
    type = obj_tables.EnumAttribute(TranscriptType)

    class Meta(obj_tables.Model.Meta):
        verbose_name = 'Transcript'
        verbose_name_plural = 'Transcripts'
        attribute_order = ('id', 'name', 'gene', 'exons', 'type',
                           'identifiers', 'references', 'comments')

    def get_seq(self):
        """ Get the 5' to 3' sequence

        Returns:
            :obj:`Bio.Seq.Seq`: sequence
        """
        ordered_exons = sorted(self.exons, key=lambda x: x.start)

        dna_seq = self.gene.polymer.get_subseq(start=ordered_exons[0].start,
                                               end=ordered_exons[-1].end)

        adjusted_exons = [(i.start - ordered_exons[0].start, i.end - ordered_exons[0].start + 1) \
            for i in ordered_exons]

        spliced_dna_seq = Bio.Seq.Seq('', alphabet=Bio.Alphabet.DNAAlphabet())
        for exon in adjusted_exons:
            spliced_dna_seq += dna_seq[exon[0]:exon[1]]

        if self.gene.strand == core.PolymerStrand.negative:
            spliced_dna_seq = spliced_dna_seq.reverse_complement()

        return spliced_dna_seq.transcribe()

    def get_empirical_formula(self, seq_input=None):
        """ Get the empirical formula for a transcript (spliced RNA) species with

        * 5' monophosphate
        * Deprotonated phosphate oxygens

        :math:`N_A * AMP + N_C * CMP + N_G * GMP + N_U * UMP - (L-1) * OH`

        Args:
            seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it
                instead of reading from fasta file to reduce IO operation 

        Returns:
           :obj:`chem.EmpiricalFormula`: empirical formula
        """
        if seq_input:
            seq = seq_input
        else:
            seq = self.get_seq()

        n_a = seq.upper().count('A')
        n_c = seq.upper().count('C')
        n_g = seq.upper().count('G')
        n_u = seq.upper().count('U')
        l = len(seq)

        formula = chem.EmpiricalFormula()
        formula.C = 10 * n_a + 9 * n_c + 10 * n_g + 9 * n_u
        formula.H = 12 * n_a + 12 * n_c + 12 * n_g + 11 * n_u - (l - 1)
        formula.N = 5 * n_a + 3 * n_c + 5 * n_g + 2 * n_u
        formula.O = 7 * n_a + 8 * n_c + 8 * n_g + 9 * n_u - (l - 1)
        formula.P = n_a + n_c + n_g + n_u

        return formula

    def get_charge(self, seq_input=None):
        """ Get the charge for a transcript (spliced RNA) species with

        * 5' monophosphate
        * Deprotonated phosphate oxygens

        :math:`-L - 1`

        Args:
            seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it
                instead of reading from fasta file to reduce IO operation

        Returns:
           :obj:`int`: charge
        """
        if seq_input:
            length = len(seq_input)
        else:
            length = len(self.get_seq())
        return -length - 1

    def get_mol_wt(self, seq_input=None):
        """ Get the molecular weight for a transcript (spliced RNA) species with

        * 5' monophosphate
        * Deprotonated phosphate oxygens

        Args:
            seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it
                instead of reading from fasta file to reduce IO operation        

        Returns:
            :obj:`float`: molecular weight (Da)
        """
        if seq_input:
            return self.get_empirical_formula(
                seq_input=seq_input).get_molecular_weight()
        else:
            return self.get_empirical_formula().get_molecular_weight()
Ejemplo n.º 6
0
class TranscriptionFactorRegulation(obj_tables.Model):
    """ Transcription factor and the direction of transcriptional regulation

    Attributes:
        transcription_factor (:obj:`ProteinSpeciesType`): transcription factor
        direction (:obj:`RegulatoryDirection`): regulatory direction

    Related attributes:
        regulatory_modules (:obj:`list` of `RegulatoryModule`): regulatory modules
    """

    transcription_factor = obj_tables.ManyToOneAttribute(
        'ProteinSpeciesType', related_name='transcription_factor_regulation')
    direction = obj_tables.EnumAttribute(RegulatoryDirection)

    class Meta(obj_tables.Model.Meta):
        attribute_order = ('transcription_factor', 'direction')
        frozen_columns = 1
        table_format = obj_tables.TableFormat.cell
        ordering = ('transcription_factor', 'direction')

    @staticmethod
    def _serialize(transcription_factor_id, direction_name):
        """ Generate string representation

        Args:
            transcription_factor_id (:obj:`str`): transcription factor id
            direction_name (:obj:`str`): regulatory direction name

        Returns:
            :obj:`str`: value of primary attribute
        """
        return '{}:{}'.format(transcription_factor_id, direction_name)

    def serialize(self):
        """ Generate string representation

        Returns:
            :obj:`str`: value of primary attribute
        """
        return self._serialize(self.transcription_factor.id,
                               self.direction.name)

    @classmethod
    def deserialize(cls, value, objects):
        """ Deserialize value
        Args:
            value (:obj:`str`): String representation
            objects (:obj:`dict`): dictionary of objects, grouped by model
        Returns:
            :obj:`tuple` of `list` of `TranscriptionFactorRegulation`, `InvalidAttribute` or `None`: tuple of cleaned value
                and cleaning error
        """
        if cls in objects and value in objects[cls]:
            return (objects[cls][value], None)

        tf_id = ProteinSpeciesType.id.pattern[1:-1]
        direction = '|'.join(rd.name for rd in RegulatoryDirection)
        pattern = r'^({}) *\: *({})$'.format(tf_id, direction)
        match = re.match(pattern, value, flags=re.I)

        if match:
            errors = []

            tf_reg_str = match.group(1)
            if ProteinSpeciesType in objects and tf_reg_str in objects[
                    ProteinSpeciesType]:
                transcription_factor = objects[ProteinSpeciesType][tf_reg_str]
            else:
                errors.append(
                    'Undefined transcription factor "{}"'.format(tf_reg_str))

            direction_str = match.group(match.lastindex)
            if direction_str in [i.name for i in list(RegulatoryDirection)]:
                direction = [
                    i for i in list(RegulatoryDirection)
                    if i.name == direction_str
                ][0]
            else:
                errors.append('Undefined regulatory direction "{}"'.format(
                    direction_str))

            if errors:
                return (None, obj_tables.InvalidAttribute(cls, errors))
            else:
                obj = cls(transcription_factor=transcription_factor,
                          direction=direction)
                if cls not in objects:
                    objects[cls] = {}
                objects[cls][obj.serialize()] = obj
                return (obj, None)

        return (None,
                obj_tables.InvalidAttribute(
                    cls, ['Invalid transcription factor regulation']))