class RegulatoryModule(obj_tables.Model): """ Knowledge about regulatory modules Attributes: id (:obj:`str`): identifier name (:obj:`str`): name gene (:obj:`GeneLocus`): gene promoter (:obj:`str`): promoter ensembl ID activity (:obj:`ActivityLevel`): cell-type specific activity level type (:obj:`RegulationType`): type of regulation (proximal or distal) transcription_factor_regulation (:obj:`TranscriptionFactorRegulation`): transcription factor and direction of regulation comments (:obj:`str`): comments references (:obj:`list` of :obj:`Reference`): references identifiers (:obj:`list` of :obj:`Identifier`): identifiers """ id = obj_tables.SlugAttribute(primary=True, unique=True) name = obj_tables.StringAttribute() gene = obj_tables.ManyToOneAttribute(GeneLocus, related_name='regulatory_modules') promoter = obj_tables.StringAttribute() activity = obj_tables.EnumAttribute(ActivityLevel) type = obj_tables.EnumAttribute(RegulationType) transcription_factor_regulation = RegDirectionAttribute( related_name='regulatory_modules') comments = obj_tables.LongStringAttribute() references = obj_tables.ManyToManyAttribute( core.Reference, related_name='regulatory_modules') identifiers = core.IdentifierAttribute(related_name='regulatory_modules') class Meta(obj_tables.Model.Meta): attribute_order = ('id', 'name', 'gene', 'promoter', 'activity', 'type', 'transcription_factor_regulation', 'identifiers', 'references', 'comments')
class Reference(obj_tables.Model): id = obj_tables.StringAttribute(primary=True, unique=True) title = obj_tables.LongStringAttribute() authors = obj_tables.LongStringAttribute() journal = obj_tables.StringAttribute() volume = obj_tables.PositiveIntegerAttribute() issue = obj_tables.PositiveIntegerAttribute() start_page = obj_tables.PositiveIntegerAttribute() end_page = obj_tables.PositiveIntegerAttribute() pubmed_id = obj_tables.PositiveIntegerAttribute(verbose_name='PubMed id') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'id', 'title', 'authors', 'journal', 'volume', 'issue', 'start_page', 'end_page', 'pubmed_id', ) verbose_name = 'Reference' verbose_name_plural = 'References'
class ExpectedInitialValue(obj_tables.Model): component = obj_tables.StringAttribute() attribute = obj_tables.StringAttribute() expected_initial_value = obj_tables.FloatAttribute() comment = obj_tables.StringAttribute() class Meta(obj_tables.Model.Meta): attribute_order = ('component', 'attribute', 'expected_initial_value', 'comment')
class PbConfig(obj_tables.Model): option = obj_tables.StringAttribute(verbose_name='Option') value = obj_tables.StringAttribute(verbose_name='Value') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'option', 'value', ) verbose_name = 'PbConfig' verbose_name_plural = 'PbConfig'
class SparseMatrixColumn(obj_tables.Model): column_i_d = obj_tables.StringAttribute(verbose_name='ColumnID') column_string = obj_tables.StringAttribute(verbose_name='ColumnString') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'column_i_d', 'column_string', ) verbose_name = 'SparseMatrixColumn' verbose_name_plural = 'SparseMatrixColumn'
class SparseMatrixRow(obj_tables.Model): row_i_d = obj_tables.StringAttribute(verbose_name='RowID') row_string = obj_tables.StringAttribute(verbose_name='RowString') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'row_i_d', 'row_string', ) verbose_name = 'SparseMatrixRow' verbose_name_plural = 'SparseMatrixRow'
class Relation(obj_tables.Model): comment = obj_tables.StringAttribute(verbose_name='Comment') reference_name = obj_tables.StringAttribute(verbose_name='ReferenceName') reference_pub_med = obj_tables.StringAttribute( verbose_name='ReferencePubMed') reference_d_o_i = obj_tables.StringAttribute(verbose_name='ReferenceDOI') description = obj_tables.StringAttribute(verbose_name='Description') i_d = obj_tables.StringAttribute(verbose_name='ID') from_object = obj_tables.StringAttribute(verbose_name='FromObject') to_object = obj_tables.StringAttribute(verbose_name='ToObject') is_symmetric = obj_tables.BooleanAttribute(verbose_name='IsSymmetric') value_quantity_type = obj_tables.FloatAttribute( verbose_name='Value:QuantityType') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'comment', 'reference_name', 'reference_pub_med', 'reference_d_o_i', 'description', 'i_d', 'from_object', 'to_object', 'is_symmetric', 'value_quantity_type', ) verbose_name = 'Relation' verbose_name_plural = 'Relation'
class SparseMatrix(obj_tables.Model): row_i_d = obj_tables.StringAttribute(verbose_name='RowID') column_i_d = obj_tables.StringAttribute(verbose_name='ColumnID') value = obj_tables.FloatAttribute(verbose_name='Value') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'row_i_d', 'column_i_d', 'value', ) verbose_name = 'SparseMatrix' verbose_name_plural = 'SparseMatrix'
class Compartment(obj_tables.Model): id = obj_tables.StringAttribute(primary=True, unique=True) name = obj_tables.StringAttribute(none=True, default=None, default_cleaned_value=None) class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'id', 'name', ) verbose_name = 'Compartment' verbose_name_plural = 'Compartments'
class GeneLocus(core.PolymerLocus): """ Knowledge of a gene Attributes: symbol (:obj:`str`): symbol Related attributes: proteins (:obj:`list` of :obj:`ProteinSpeciesType`): protein """ symbol = obj_tables.StringAttribute() start = obj_tables.IntegerAttribute() end = obj_tables.IntegerAttribute() is_essential = obj_tables.BooleanAttribute() proteins = obj_tables.OneToOneAttribute(ProteinSpeciesType, related_name='gene') homologs = obj_tables.LongStringAttribute() evidence = obj_tables.OneToManyAttribute(core.Evidence, related_name='genes') cog = obj_tables.sci.onto.OntoTermAttribute( kbOnt, terms=kbOnt['WC:COG'].subclasses(), none=True) class Meta(obj_tables.Model.Meta): verbose_name = 'Gene' verbose_name_plural = 'Genes' attribute_order = ('id', 'name', 'synonyms', 'symbol', 'polymer', 'start', 'end', 'cog', 'homologs', 'is_essential', 'proteins', 'evidence', 'identifiers', 'references', 'comments')
class Metabolite(obj_tables.Model): id = obj_tables.StringAttribute(primary=True, unique=True) name = obj_tables.StringAttribute(none=True, default=None, default_cleaned_value=None) formula = obj_tables.chem.ChemicalFormulaAttribute() class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'id', 'name', 'formula', ) verbose_name = 'Metabolite' verbose_name_plural = 'Metabolites'
class rxnconContingencyList(obj_tables.Model): u_i_d_contingency = obj_tables.IntegerAttribute( verbose_name='UID:Contingency') target = obj_tables.StringAttribute(verbose_name='Target') contingency = obj_tables.StringAttribute(verbose_name='Contingency') modifier = obj_tables.StringAttribute(verbose_name='Modifier') reference_identifiers_pubmed = obj_tables.StringAttribute( verbose_name='Reference:Identifiers:pubmed') quality = obj_tables.StringAttribute(verbose_name='Quality') comment = obj_tables.StringAttribute(verbose_name='Comment') internal_complex_i_d = obj_tables.StringAttribute( verbose_name='InternalComplexID') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'u_i_d_contingency', 'target', 'contingency', 'modifier', 'reference_identifiers_pubmed', 'quality', 'comment', 'internal_complex_i_d', ) verbose_name = 'rxnconContingencyList' verbose_name_plural = 'rxnconContingencyList'
class Address(obj_tables.Model): street = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Street') city = obj_tables.StringAttribute(verbose_name='City') state = obj_tables.StringAttribute(verbose_name='State') zip_code = obj_tables.StringAttribute(verbose_name='Zip code') country = obj_tables.StringAttribute(verbose_name='Country') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.multiple_cells attribute_order = ( 'street', 'city', 'state', 'zip_code', 'country', ) verbose_name = 'Address' verbose_name_plural = 'Addresses'
class Definition(obj_tables.Model): component_name = obj_tables.StringAttribute(verbose_name='ComponentName') component_type = obj_tables.StringAttribute(verbose_name='ComponentType') is_part_of = obj_tables.StringAttribute(verbose_name='IsPartOf') format = obj_tables.StringAttribute(verbose_name='Format') description = obj_tables.StringAttribute(verbose_name='Description') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'component_name', 'component_type', 'is_part_of', 'format', 'description', ) verbose_name = 'Definition' verbose_name_plural = 'Definition'
class StoichiometricMatrix(obj_tables.Model): reaction_i_d = obj_tables.StringAttribute(verbose_name='ReactionID') stoichiometry = obj_tables.StringAttribute(verbose_name='Stoichiometry') substrate = obj_tables.StringAttribute(verbose_name='Substrate') product = obj_tables.StringAttribute(verbose_name='Product') location = obj_tables.StringAttribute(verbose_name='Location') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'reaction_i_d', 'stoichiometry', 'substrate', 'product', 'location', ) verbose_name = 'StoichiometricMatrix' verbose_name_plural = 'StoichiometricMatrix'
class Gene(obj_tables.Model): id = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Id') symbol = obj_tables.StringAttribute(verbose_name='Symbol') location = obj_tables.OneToOneAttribute('Location', related_name='genes', verbose_name='Location') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'id', 'symbol', 'location', ) verbose_name = 'Gene' verbose_name_plural = 'Genes'
class Measurement(obj_tables.Model): sample = obj_tables.StringAttribute(verbose_name='Sample') time = obj_tables.StringAttribute(verbose_name='Time') unit = obj_tables.StringAttribute(verbose_name='Unit') value_type = obj_tables.StringAttribute(verbose_name='ValueType') description = obj_tables.StringAttribute(verbose_name='Description') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'sample', 'time', 'unit', 'value_type', 'description', ) verbose_name = 'Measurement' verbose_name_plural = 'Measurement'
class PtmSite(core.PolymerLocus): """ Knowledge of protein modification sites Attributes: modified_protein (:obj:`ProteinSpeciesType`): modified protein type (:obj:`str`): type of modification (phosphorylation, methylation, etc...) modified_residue (:obj:`str`): residue name and position in protein sequence fractional_abundance (:obj:`int`): ratio of modified protein abundance """ type = obj_tables.StringAttribute() modified_protein = obj_tables.ManyToOneAttribute('ProteinSpeciesType', related_name='ptm_sites') modified_residue = obj_tables.StringAttribute() fractional_abundance = obj_tables.FloatAttribute() class Meta(obj_tables.Model.Meta): attribute_order = ('id', 'name', 'modified_protein', 'type', 'modified_residue', 'fractional_abundance', 'identifiers', 'references', 'comments')
class Transaction(obj_tables.Model): """ Stores transactions """ amount = obj_tables.PositiveFloatAttribute() category = obj_tables.StringAttribute() date = obj_tables.DateAttribute() payee = obj_tables.StringAttribute() class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'amount', 'category', 'date', 'payee', ) verbose_name = 'Transaction' verbose_name_plural = 'Transaction' description = 'Stores transactions'
class Person(obj_tables.Model): name = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Name') type = obj_tables.EnumAttribute(['family', 'friend', 'business'], verbose_name='Type') company = obj_tables.ManyToOneAttribute('Company', related_name='employees', verbose_name='Company') email_address = obj_tables.EmailAttribute(verbose_name='Email address') phone_number = obj_tables.StringAttribute(verbose_name='Phone number') address = obj_tables.OneToOneAttribute('Address', related_name='person', verbose_name='Address') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'name', 'type', 'company', 'email_address', 'phone_number', 'address', ) verbose_name = 'Person' verbose_name_plural = 'People'
class Transaction(obj_tables.Model): """ Stores transactions """ date = obj_tables.DateAttribute(verbose_name='Date') amount = obj_tables.FloatAttribute(verbose_name='Amount') tax_category = obj_tables.StringAttribute(verbose_name='Tax category') payee = obj_tables.LongStringAttribute(verbose_name='Payee') spending_category = obj_tables.StringAttribute( verbose_name='Spending category') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'date', 'amount', 'tax_category', 'payee', 'spending_category', ) verbose_name = 'Transaction' verbose_name_plural = 'Transaction' description = 'Stores transactions'
class Layout(obj_tables.Model): i_d = obj_tables.StringAttribute(verbose_name='ID') name = obj_tables.StringAttribute(verbose_name='Name') s_b_m_l_layout_model_entity = obj_tables.StringAttribute( verbose_name='SBML:layout:modelEntity') s_b_m_l_layout_compartment_id = obj_tables.StringAttribute( verbose_name='SBML:layout:compartment:id') s_b_m_l_layout_reaction_id = obj_tables.StringAttribute( verbose_name='SBML:layout:reaction:id') s_b_m_l_layout_species_id = obj_tables.StringAttribute( verbose_name='SBML:layout:species:id') s_b_m_l_layout_curve_segment = obj_tables.StringAttribute( verbose_name='SBML:layout:curveSegment') s_b_m_l_layout_x = obj_tables.FloatAttribute(verbose_name='SBML:layout:X') s_b_m_l_layout_y = obj_tables.FloatAttribute(verbose_name='SBML:layout:Y') s_b_m_l_layout_width = obj_tables.FloatAttribute( verbose_name='SBML:layout:width') s_b_m_l_layout_height = obj_tables.FloatAttribute( verbose_name='SBML:layout:height') s_b_m_l_layout_text = obj_tables.StringAttribute( verbose_name='SBML:layout:text') s_b_m_l_layout_species_role = obj_tables.StringAttribute( verbose_name='SBML:layout:speciesRole') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'i_d', 'name', 's_b_m_l_layout_model_entity', 's_b_m_l_layout_compartment_id', 's_b_m_l_layout_reaction_id', 's_b_m_l_layout_species_id', 's_b_m_l_layout_curve_segment', 's_b_m_l_layout_x', 's_b_m_l_layout_y', 's_b_m_l_layout_width', 's_b_m_l_layout_height', 's_b_m_l_layout_text', 's_b_m_l_layout_species_role', ) verbose_name = 'Layout' verbose_name_plural = 'Layout'
class Company(obj_tables.Model): name = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Name') url = obj_tables.UrlAttribute(verbose_name='URL') address = obj_tables.OneToOneAttribute('Address', related_name='company', verbose_name='Address') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.column attribute_order = ( 'name', 'url', 'address', ) verbose_name = 'Company' verbose_name_plural = 'Companies'
class FbcObjective(obj_tables.Model): i_d = obj_tables.StringAttribute(verbose_name='ID') name = obj_tables.StringAttribute(verbose_name='Name') s_b_m_l_fbc_type = obj_tables.StringAttribute(verbose_name='SBML:fbc:type') s_b_m_l_fbc_active = obj_tables.BooleanAttribute( verbose_name='SBML:fbc:active') s_b_m_l_fbc_objective = obj_tables.StringAttribute( verbose_name='SBML:fbc:objective') s_b_m_l_fbc_reaction = obj_tables.StringAttribute( verbose_name='SBML:fbc:reaction') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'i_d', 'name', 's_b_m_l_fbc_type', 's_b_m_l_fbc_active', 's_b_m_l_fbc_objective', 's_b_m_l_fbc_reaction', ) verbose_name = 'FbcObjective' verbose_name_plural = 'FbcObjective'
class Position(obj_tables.Model): element = obj_tables.StringAttribute(verbose_name='Element') position_x = obj_tables.FloatAttribute(verbose_name='PositionX') position_y = obj_tables.FloatAttribute(verbose_name='PositionY') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'element', 'position_x', 'position_y', ) verbose_name = 'Position' verbose_name_plural = 'Position'
class Relationship(obj_tables.Model): i_d = obj_tables.StringAttribute(verbose_name='ID') from_object = obj_tables.StringAttribute(verbose_name='FromObject') to_object = obj_tables.StringAttribute(verbose_name='ToObject') value = obj_tables.IntegerAttribute(verbose_name='Value') is_symmetric = obj_tables.BooleanAttribute(verbose_name='IsSymmetric') sign = obj_tables.EnumAttribute(['+', '-', '0'], default='0', verbose_name='Sign') relation = obj_tables.StringAttribute(verbose_name='Relation') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'i_d', 'from_object', 'to_object', 'value', 'is_symmetric', 'sign', 'relation', ) verbose_name = 'Relationship' verbose_name_plural = 'Relationship'
class Location(obj_tables.Model): chromosome = obj_tables.StringAttribute(verbose_name='Chromosome') five_prime = obj_tables.PositiveIntegerAttribute(primary=True, unique=True, verbose_name='5\'') three_prime = obj_tables.PositiveIntegerAttribute(verbose_name='3\'') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.multiple_cells attribute_order = ( 'chromosome', 'five_prime', 'three_prime', ) verbose_name = 'Location' verbose_name_plural = 'Locations'
class GeneLocus(core.PolymerLocus): """ Knowledge of a gene Attributes: symbol (:obj:`str`): symbol type (:obj:`GeneType`): type of gene Related attributes: transcripts (:obj:`list` of :obj:`TranscriptSpeciesType`): transcripts regulatory_modules (:obj:`list` of `RegulatoryModule`): regulatory_modules """ symbol = obj_tables.StringAttribute() homologs = obj_tables.LongStringAttribute() class Meta(obj_tables.Model.Meta): verbose_name = 'Gene' verbose_name_plural = 'Genes' attribute_order = ('id', 'name', 'synonyms', 'symbol', 'homologs', 'polymer', 'strand', 'start', 'end', 'identifiers', 'references', 'comments')
class Transcript(obj_tables.Model): id = obj_tables.StringAttribute(primary=True, unique=True, verbose_name='Id') gene = obj_tables.ManyToOneAttribute('Gene', related_name='transcripts', verbose_name='Gene') location = obj_tables.OneToOneAttribute('Location', related_name='transcripts', verbose_name='Location') class Meta(obj_tables.Model.Meta): table_format = obj_tables.TableFormat.row attribute_order = ( 'id', 'gene', 'location', ) verbose_name = 'Transcript' verbose_name_plural = 'Transcripts'
class ProteinSpeciesType(core.PolymerSpeciesType): """ Knowledge of a protein monomer Attributes: uniprot (:obj:`str`): uniprot id transcript (:obj:`TranscriptSpeciesType`): transcript coding_regions (:obj:`list` of :obj:`LocusAttribute`): CDS coordinates Related attributes: transcription_factor_regulation (:obj:`list` of `TranscriptionFactorRegulation`): transcription factor regulation ptm_sites (:obj:list` of `PtmSite`): protein modification sites """ uniprot = obj_tables.StringAttribute() transcript = obj_tables.OneToOneAttribute(TranscriptSpeciesType, related_name='protein') coding_regions = LocusAttribute(related_name='proteins') class Meta(obj_tables.Model.Meta): verbose_name = 'Protein' verbose_name_plural = 'Proteins' attribute_order = ('id', 'name', 'uniprot', 'transcript', 'coding_regions', 'identifiers', 'references', 'comments') def get_seq(self, table=1, cds=True): """ Get the 5' to 3' sequence Args: table (:obj:`int`, optional): NCBI identifier for translation table (default = standard table) cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS Returns: :obj:`Bio.Seq.Seq`: sequence """ ordered_cds = sorted(self.coding_regions, key=lambda x: x.start) dna_seq = self.transcript.gene.polymer.get_subseq( start=ordered_cds[0].start, end=ordered_cds[-1].end) adjusted_cds = [(i.start - ordered_cds[0].start, i.end - ordered_cds[0].start + 1) \ for i in ordered_cds] spliced_dna_seq = Bio.Seq.Seq('', alphabet=Bio.Alphabet.DNAAlphabet()) for i in adjusted_cds: spliced_dna_seq += dna_seq[i[0]:i[1]] if self.transcript.gene.strand == core.PolymerStrand.negative: spliced_dna_seq = spliced_dna_seq.reverse_complement() return spliced_dna_seq.transcribe().translate(table=table, cds=cds) def get_seq_and_start_codon(self, table=1, cds=True): """ Get the 5' to 3' amino acid sequence and the start codon Args: table (:obj:`int`, optional): NCBI identifier for translation table (default = standard table) cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS Returns: :obj:`Bio.Seq.Seq`: coding RNA sequence that will be translated :obj:`Bio.Seq.Seq`: amino acid sequence :obj:`Bio.Seq.Seq`: start codon """ ordered_cds = sorted(self.coding_regions, key=lambda x: x.start) dna_seq = self.transcript.gene.polymer.get_subseq( start=ordered_cds[0].start, end=ordered_cds[-1].end) adjusted_cds = [(i.start - ordered_cds[0].start, i.end - ordered_cds[0].start + 1) \ for i in ordered_cds] spliced_dna_seq = Bio.Seq.Seq('', alphabet=Bio.Alphabet.DNAAlphabet()) for i in adjusted_cds: spliced_dna_seq += dna_seq[i[0]:i[1]] if self.transcript.gene.strand == core.PolymerStrand.negative: spliced_dna_seq = spliced_dna_seq.reverse_complement() coding_rna_seq = spliced_dna_seq.transcribe() protein_seq = coding_rna_seq.translate(table=table, cds=cds) start_codon_index = 0 for aa_seq in protein_seq: if aa_seq == '*': start_codon_index += 3 else: break start_codon = coding_rna_seq[start_codon_index:start_codon_index + 3] return coding_rna_seq, protein_seq, start_codon def get_empirical_formula(self, table=1, cds=True, seq_input=None): """ Get the empirical formula Args: table (:obj:`int`, optional): NCBI identifier for translation table (default = standard table) cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it instead of reading from fasta file to reduce IO operation Returns: :obj:`chem.EmpiricalFormula`: empirical formula """ if seq_input: seq = seq_input else: seq = self.get_seq(table=table, cds=cds) l = len(seq) - seq.count('*') n_a = seq.count('A') # Ala: Alanine (C3 H7 N O2) n_r = seq.count('R') # Arg: Arginine (C6 H14 N4 O2) n_n = seq.count('N') # Asn: Asparagine (C4 H8 N2 O3) n_d = seq.count('D') # Asp: Aspartic acid (C4 H7 N O4) n_c = seq.count('C') # Cys: Cysteine (C3 H7 N O2 S) n_q = seq.count('Q') # Gln: Glutamine (C5 H10 N2 O3) n_e = seq.count('E') # Glu: Glutamic acid (C5 H9 N O4) n_g = seq.count('G') # Gly: Glycine (C2 H5 N O2) n_h = seq.count('H') # His: Histidine (C6 H9 N3 O2) n_i = seq.count('I') # Ile: Isoleucine (C6 H13 N O2) n_l = seq.count('L') # Leu: Leucine (C6 H13 N O2) n_k = seq.count('K') # Lys: Lysine (C6 H14 N2 O2) n_m = seq.count('M') # Met: Methionine (C5 H11 N O2 S) n_f = seq.count('F') # Phe: Phenylalanine (C9 H11 N O2) n_p = seq.count('P') # Pro: Proline (C5 H9 N O2) n_s = seq.count('S') # Ser: Serine (C3 H7 N O3) n_t = seq.count('T') # Thr: Threonine (C4 H9 N O3) n_w = seq.count('W') # Trp: Tryptophan (C11 H12 N2 O2) n_y = seq.count('Y') # Tyr: Tyrosine (C9 H11 N O3) n_v = seq.count('V') # Val: Valine (C5 H11 N O2) n_u = seq.count('U') # Selcys: Selenocysteine (C3 H7 N O2 Se) formula = chem.EmpiricalFormula() formula.C = 3 * n_a + 6 * n_r + 4 * n_n + 4 * n_d + 3 * n_c + \ 5 * n_q + 5 * n_e + 2 * n_g + 6 * n_h + 6 * n_i + \ 6 * n_l + 6 * n_k + 5 * n_m + 9 * n_f + 5 * n_p + \ 3 * n_s + 4 * n_t + 11 * n_w + 9 * n_y + 5 * n_v + \ 3 * n_u formula.H = 7 * n_a + 14 * n_r + 8 * n_n + 7 * n_d + 7 * n_c + \ 10 * n_q + 9 * n_e + 5 * n_g + 9 * n_h + 13 * n_i + \ 13 * n_l + 14 * n_k + 11 * n_m + 11 * n_f + 9 * n_p + \ 7 * n_s + 9 * n_t + 12 * n_w + 11 * n_y + 11 * n_v + \ 7 * n_u - 2 * (l - 1) formula.N = 1 * n_a + 4 * n_r + 2 * n_n + 1 * n_d + 1 * n_c + \ 2 * n_q + 1 * n_e + 1 * n_g + 3 * n_h + 1 * n_i + \ 1 * n_l + 2 * n_k + 1 * n_m + 1 * n_f + 1 * n_p + \ 1 * n_s + 1 * n_t + 2 * n_w + 1 * n_y + 1 * n_v + \ 1 * n_u formula.O = 2 * n_a + 2 * n_r + 3 * n_n + 4 * n_d + 2 * n_c + \ 3 * n_q + 4 * n_e + 2 * n_g + 2 * n_h + 2 * n_i + \ 2 * n_l + 2 * n_k + 2 * n_m + 2 * n_f + 2 * n_p + \ 3 * n_s + 3 * n_t + 2 * n_w + 3 * n_y + 2 * n_v + \ 2 * n_u - (l - 1) formula.S = n_c + n_m formula.Se = n_u return formula def get_charge(self, table=1, cds=True, seq_input=None): """ Get the charge at physiological pH Args: table (:obj:`int`, optional): NCBI identifier for translation table (default = standard table) cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it instead of reading from fasta file to reduce IO operation Returns: :obj:`int`: charge """ if seq_input: seq = seq_input else: seq = self.get_seq(table=table, cds=cds) n_r = seq.count('R') n_h = seq.count('H') n_k = seq.count('K') n_d = seq.count('D') n_e = seq.count('E') return (n_r + n_h + n_k) - (n_d + n_e) def get_mol_wt(self, table=1, cds=True, seq_input=None): """ Get the molecular weight Args: table (:obj:`int`, optional): NCBI identifier for translation table (default = standard table) cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it instead of reading from fasta file to reduce IO operation Returns: :obj:`float`: molecular weight """ if seq_input: return self.get_empirical_formula( table=table, cds=cds, seq_input=seq_input).get_molecular_weight() else: return self.get_empirical_formula(table=table, cds=cds).get_molecular_weight()