def test_get_empirical_formula(self): # MPN001 self.assertEqual(self.prot1.get_empirical_formula(), chem.EmpiricalFormula('C1980H3146N510O596S7')) # MPN011 self.assertEqual(self.prot2.get_empirical_formula(), chem.EmpiricalFormula('C1246H1928N306O352S3'))
def test_EmpiricalFormula___setitem__(self): f = chem.EmpiricalFormula() f.C = 0 self.assertEqual(f, {}) self.assertEqual(dict(f), {}) self.assertEqual(str(f), '') f = chem.EmpiricalFormula() f.A = 1 self.assertEqual(f, {'A': 1}) f.A = 0 self.assertEqual(f, {}) self.assertEqual(dict(f), {}) self.assertEqual(str(f), '') f.A = 1.5 self.assertEqual(f, {'A': 1.5}) f = chem.EmpiricalFormula() with self.assertRaisesRegex(ValueError, 'Coefficient must be a float'): f.A = 'a' f = chem.EmpiricalFormula() with self.assertRaisesRegex( ValueError, 'Element must be a one or two letter string'): f.Aaa = 1
def test_get_empirical_formula(self): dna1 = core.DnaSpeciesType(id='dna2', sequence_path=self.sequence_path) gene1 = eukaryote_schema.GeneLocus(polymer=dna1, start=1, end=1) rna1 = eukaryote_schema.PreRnaSpeciesType(gene=gene1) self.assertEqual(rna1.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O7P')) gene2 = eukaryote_schema.GeneLocus(polymer=dna1, start=2, end=2) rna2 = eukaryote_schema.PreRnaSpeciesType(gene=gene2) self.assertEqual(rna2.get_empirical_formula(), chem.EmpiricalFormula('C9H12N3O8P')) gene3 = eukaryote_schema.GeneLocus(polymer=dna1, start=3, end=3) rna3 = eukaryote_schema.PreRnaSpeciesType(gene=gene3) self.assertEqual(rna3.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O8P')) gene4 = eukaryote_schema.GeneLocus(polymer=dna1, start=4, end=4) rna4 = eukaryote_schema.PreRnaSpeciesType(gene=gene4) self.assertEqual(rna4.get_empirical_formula(), chem.EmpiricalFormula('C9H11N2O9P')) dna2 = core.DnaSpeciesType(id='dna3', sequence_path=self.sequence_path) gene5 = eukaryote_schema.GeneLocus(polymer=dna2, start=1, end=2) rna5 = eukaryote_schema.PreRnaSpeciesType(gene=gene5) self.assertEqual(rna5.get_empirical_formula(), chem.EmpiricalFormula('C20H23N10O13P2'))
def test_get_empirical_formula(self): dna1 = core.DnaSpeciesType(id='dna2', sequence_path=self.sequence_path) tu1 = prokaryote.TranscriptionUnitLocus(id='tu1', polymer=dna1, start=1, end=1) rna1 = prokaryote.RnaSpeciesType(id='rna1', name='rna1', transcription_units=[tu1]) self.assertEqual(rna1.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O7P')) dna1 = core.DnaSpeciesType(id='dna3', sequence_path=self.sequence_path) tu1 = prokaryote.TranscriptionUnitLocus(id='tu1', polymer=dna1, start=1, end=1) rna1 = prokaryote.RnaSpeciesType(id='rna1', name='rna1', transcription_units=[tu1]) self.assertEqual(rna1.get_empirical_formula(), chem.EmpiricalFormula('C9H12N3O8P')) dna1 = core.DnaSpeciesType(id='dna4', sequence_path=self.sequence_path) tu1 = prokaryote.TranscriptionUnitLocus(id='tu1', polymer=dna1, start=1, end=1) rna1 = prokaryote.RnaSpeciesType(id='rna1', name='rna1', transcription_units=[tu1]) self.assertEqual(rna1.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O8P')) dna1 = core.DnaSpeciesType(id='dna5', sequence_path=self.sequence_path) tu1 = prokaryote.TranscriptionUnitLocus(id='tu1', polymer=dna1, start=1, end=1) rna1 = prokaryote.RnaSpeciesType(id='rna1', name='rna1', transcription_units=[tu1]) self.assertEqual(rna1.get_empirical_formula(), chem.EmpiricalFormula('C9H11N2O9P')) dna1 = core.DnaSpeciesType(id='dna6', sequence_path=self.sequence_path) tu1 = prokaryote.TranscriptionUnitLocus(id='tu1', polymer=dna1, start=1, end=2) rna1 = prokaryote.RnaSpeciesType(id='rna1', name='rna1', transcription_units=[tu1]) self.assertEqual(rna1.get_empirical_formula(), chem.EmpiricalFormula('C20H23N10O13P2'))
def test_EmpiricalFormula___hash__(self): f = chem.EmpiricalFormula('H2O') g = chem.EmpiricalFormula('H2O') h = chem.EmpiricalFormula('H') self.assertIn(f, [g]) self.assertIn(f, set([g])) self.assertIn(f, {g: True}) self.assertNotIn(f, [h]) self.assertNotIn(f, set([h])) self.assertNotIn(f, {h: True})
def test_get_empirical_formula(self): # Default translation table used is 1 (standard) self.assertEqual(self.prot1.get_empirical_formula(), chem.EmpiricalFormula('C53H96N14O15S1')) self.assertEqual(self.prot2.get_empirical_formula(), chem.EmpiricalFormula('C53H91N11O11S1')) # Test using input sequence test_prot = eukaryote.ProteinSpeciesType() self.assertEqual(test_prot.get_empirical_formula(seq_input=Bio.Seq.Seq('MKVLINKNEL')), chem.EmpiricalFormula('C53H96N14O15S1')) self.assertEqual(test_prot.get_empirical_formula(seq_input=Bio.Seq.Seq('MKKFLLTPL')), chem.EmpiricalFormula('C53H91N11O11S1'))
def get_empirical_formula(self): """ Get the empirical formula for an RNA transcript with * 5' monophosphate * Deprotonated phosphate oxygens :math:`N_A * AMP + N_C * CMP + N_G * GMP + N_U * UMP - (L-1) * OH` Returns: :obj:`chem.EmpiricalFormula`: empirical formula """ seq = self.get_seq() n_a = seq.count('A') n_c = seq.count('C') n_g = seq.count('G') n_u = seq.count('U') l = len(seq) formula = chem.EmpiricalFormula() formula.C = 10 * n_a + 9 * n_c + 10 * n_g + 9 * n_u formula.H = 12 * n_a + 12 * n_c + 12 * n_g + 11 * n_u - (l - 1) formula.N = 5 * n_a + 3 * n_c + 5 * n_g + 2 * n_u formula.O = 7 * n_a + 8 * n_c + 8 * n_g + 9 * n_u - (l - 1) formula.P = n_a + n_c + n_g + n_u return formula
def __init__( self, default=None, none_value=None, verbose_name='', description="A chemical formula (e.g. 'H2O', 'CO2', or 'NaCl')", primary=False, unique=False): """ Args: default (:obj:`chem.EmpiricalFormula`, :obj:`dict`, :obj:`str`, or :obj:`None`, optional): default value none_value (:obj:`object`, optional): none value verbose_name (:obj:`str`, optional): verbose name description (:obj:`str`, optional): description primary (:obj:`bool`, optional): indicate if attribute is primary attribute unique (:obj:`bool`, optional): indicate if attribute value must be unique """ if not isinstance(default, chem.EmpiricalFormula) and default is not None: default = chem.EmpiricalFormula(default) super(ChemicalFormulaAttribute, self).__init__(default=default, none_value=none_value, verbose_name=verbose_name, description=description, primary=primary, unique=unique) if primary: self.type = chem.EmpiricalFormula else: self.type = (chem.EmpiricalFormula, None.__class__)
def get_empirical_formula(self, seq_input=None): """ Get the empirical formula for a transcript (spliced RNA) species with * 5' monophosphate * Deprotonated phosphate oxygens :math:`N_A * AMP + N_C * CMP + N_G * GMP + N_U * UMP - (L-1) * OH` Args: seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it instead of reading from fasta file to reduce IO operation Returns: :obj:`chem.EmpiricalFormula`: empirical formula """ if seq_input: seq = seq_input else: seq = self.get_seq() n_a = seq.upper().count('A') n_c = seq.upper().count('C') n_g = seq.upper().count('G') n_u = seq.upper().count('U') l = len(seq) formula = chem.EmpiricalFormula() formula.C = 10 * n_a + 9 * n_c + 10 * n_g + 9 * n_u formula.H = 12 * n_a + 12 * n_c + 12 * n_g + 11 * n_u - (l - 1) formula.N = 5 * n_a + 3 * n_c + 5 * n_g + 2 * n_u formula.O = 7 * n_a + 8 * n_c + 8 * n_g + 9 * n_u - (l - 1) formula.P = n_a + n_c + n_g + n_u return formula
def test_ComplexSpeciesType(self): # Test constructor complex1 = core.ComplexSpeciesType() self.assertEqual(complex1.region, '') self.assertEqual(complex1.binding, '') self.assertEqual(complex1.complex_type, '') self.assertEqual(complex1.composition_in_uniprot, '') self.assertEqual(complex1.formation_process, None) self.assertEqual(complex1.subunits, []) cofactor1 = core.MetaboliteSpeciesType( id='cofactor1', structure= 'InChI=1S/C8H7NO3/c10-6-1-4-5(2-7(6)11)9-3-8(4)12/h1-2,8-9,12H,3H2' ) cofactor2 = core.MetaboliteSpeciesType(id='cofactor2', structure='InChI=1S/Zn/q+2') # Test adding subunit composition # Add subunit composition: (2) cofactor1 + (3) cofactor2 ==> complex1 species_type_coeff1 = core.SpeciesTypeCoefficient( species_type=cofactor1, coefficient=2) species_type_coeff2 = core.SpeciesTypeCoefficient( species_type=cofactor2, coefficient=3) complex1.subunits = [species_type_coeff1, species_type_coeff2] self.assertEqual(complex1.get_charge(), 6) self.assertAlmostEqual( complex1.get_mol_wt(), (2 * cofactor1.get_mol_wt() + 3 * cofactor2.get_mol_wt())) self.assertEqual(complex1.get_empirical_formula(), chem.EmpiricalFormula('C16H14N2O6Zn3'))
def test_get_formula(self): gly_inchi = 'InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)' gly_formula = 'C2H5NO2' mol = openbabel.OBMol() conversion = openbabel.OBConversion() conversion.SetInFormat('inchi') conversion.ReadString(mol, gly_inchi) self.assertEqual(chem.OpenBabelUtils.get_formula(mol), chem.EmpiricalFormula('C2H5NO2'))
def get_empirical_formula(self, cds=True): """ Get the empirical formula Returns: :obj:`chem.EmpiricalFormula`: empirical formula """ seq = self.get_seq(cds=cds) l = len(seq) n_a = seq.count('A') # Ala: Alanine (C3 H7 N O2) n_r = seq.count('R') # Arg: Arginine (C6 H14 N4 O2) n_n = seq.count('N') # Asn: Asparagine (C4 H8 N2 O3) n_d = seq.count('D') # Asp: Aspartic acid (C4 H7 N O4) n_c = seq.count('C') # Cys: Cysteine (C3 H7 N O2 S) n_q = seq.count('Q') # Gln: Glutamine (C5 H10 N2 O3) n_e = seq.count('E') # Glu: Glutamic acid (C5 H9 N O4) n_g = seq.count('G') # Gly: Glycine (C2 H5 N O2) n_h = seq.count('H') # His: Histidine (C6 H9 N3 O2) n_i = seq.count('I') # Ile: Isoleucine (C6 H13 N O2) n_l = seq.count('L') # Leu: Leucine (C6 H13 N O2) n_k = seq.count('K') # Lys: Lysine (C6 H14 N2 O2) n_m = seq.count('M') # Met: Methionine (C5 H11 N O2 S) n_f = seq.count('F') # Phe: Phenylalanine (C9 H11 N O2) n_p = seq.count('P') # Pro: Proline (C5 H9 N O2) n_s = seq.count('S') # Ser: Serine (C3 H7 N O3) n_t = seq.count('T') # Thr: Threonine (C4 H9 N O3) n_w = seq.count('W') # Trp: Tryptophan (C11 H12 N2 O2) n_y = seq.count('Y') # Tyr: Tyrosine (C9 H11 N O3) n_v = seq.count('V') # Val: Valine (C5 H11 N O2) formula = chem.EmpiricalFormula() formula.C = 3 * n_a + 6 * n_r + 4 * n_n + 4 * n_d + 3 * n_c + \ 5 * n_q + 5 * n_e + 2 * n_g + 6 * n_h + 6 * n_i + \ 6 * n_l + 6 * n_k + 5 * n_m + 9 * n_f + 5 * n_p + \ 3 * n_s + 4 * n_t + 11 * n_w + 9 * n_y + 5 * n_v formula.H = 7 * n_a + 14 * n_r + 8 * n_n + 7 * n_d + 7 * n_c + \ 10 * n_q + 9 * n_e + 5 * n_g + 9 * n_h + 13 * n_i + \ 13 * n_l + 14 * n_k + 11 * n_m + 11 * n_f + 9 * n_p + \ 7 * n_s + 9 * n_t + 12 * n_w + 11 * n_y + 11 * n_v - 2 * (l - 1) formula.N = 1 * n_a + 4 * n_r + 2 * n_n + 1 * n_d + 1 * n_c + \ 2 * n_q + 1 * n_e + 1 * n_g + 3 * n_h + 1 * n_i + \ 1 * n_l + 2 * n_k + 1 * n_m + 1 * n_f + 1 * n_p + \ 1 * n_s + 1 * n_t + 2 * n_w + 1 * n_y + 1 * n_v formula.O = 2 * n_a + 2 * n_r + 3 * n_n + 4 * n_d + 2 * n_c + \ 3 * n_q + 4 * n_e + 2 * n_g + 2 * n_h + 2 * n_i + \ 2 * n_l + 2 * n_k + 2 * n_m + 2 * n_f + 2 * n_p + \ 3 * n_s + 3 * n_t + 2 * n_w + 3 * n_y + 2 * n_v - (l - 1) formula.S = n_c + n_m return formula
def test_ComplexSpeciesType(self): # Test constructor complex1 = core.ComplexSpeciesType() # Generate test proteins from Mycoplasma Genintalium Genome dna1 = core.DnaSpeciesType(id='chromosome', sequence_path='tests/fixtures/seq.fna') cell1 = dna1.cell = core.Cell() cell1.knowledge_base = core.KnowledgeBase( translation_table=4) # Table 4 is for mycoplasma # Protein 1, MPN001 gene1 = prokaryote_schema.GeneLocus(id='gene1', cell=cell1, polymer=dna1, start=692, end=1834) tu1 = prokaryote_schema.TranscriptionUnitLocus(id='tu1', genes=[gene1], polymer=dna1) prot1 = prokaryote_schema.ProteinSpeciesType(id='prot1', gene=gene1, cell=cell1) # Protein 2, MPN011 gene2 = prokaryote_schema.GeneLocus(id='gene2', cell=cell1, polymer=dna1, start=12838, end=13533, strand=core.PolymerStrand.negative) tu2 = prokaryote_schema.TranscriptionUnitLocus(id='tu2', genes=[gene2], polymer=dna1) prot2 = prokaryote_schema.ProteinSpeciesType(id='prot2', gene=gene2, cell=cell1) # Test adding complexation # Add formation reaction: (2) prot1 + (3) prot2 ==> complex1 species_coeff1 = core.SpeciesTypeCoefficient(species_type=prot1, coefficient=2) species_coeff2 = core.SpeciesTypeCoefficient(species_type=prot2, coefficient=3) complex1.subunits = [species_coeff1, species_coeff2] self.assertEqual(complex1.get_charge(), 38) self.assertAlmostEqual( complex1.get_mol_wt(), (2 * prot1.get_mol_wt() + 3 * prot2.get_mol_wt())) self.assertEqual(complex1.get_empirical_formula(), chem.EmpiricalFormula('C7698H12076N1938O2248S23'))
def from_builtin(self, json): """ Decode a simple Python representation (dict, list, str, float, bool, None) of a value of the attribute that is compatible with JSON and YAML Args: json (:obj:`dict`): simple Python representation of a value of the attribute Returns: :obj:`chem.EmpiricalFormula`: decoded value of the attribute """ if json: return chem.EmpiricalFormula(json) return None
def test_get_empirical_formula(self): dna1 = core.DnaSpeciesType(id='dna3', sequence_path=self.sequence_path) gene1 = eukaryote_schema.GeneLocus(polymer=dna1, start=1, end=4) rna1 = eukaryote_schema.PreRnaSpeciesType(gene=gene1) exon1 = eukaryote_schema.ExonLocus(start=1, end=1) transcript1 = eukaryote_schema.TranscriptSpeciesType(rna=rna1, exons=[exon1]) self.assertEqual(transcript1.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O7P')) exon2 = eukaryote_schema.ExonLocus(start=2, end=2) transcript2 = eukaryote_schema.TranscriptSpeciesType(rna=rna1, exons=[exon2]) self.assertEqual(transcript2.get_empirical_formula(), chem.EmpiricalFormula('C9H12N3O8P')) exon3 = eukaryote_schema.ExonLocus(start=3, end=3) transcript3 = eukaryote_schema.TranscriptSpeciesType(rna=rna1, exons=[exon3]) self.assertEqual(transcript3.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O8P')) exon4 = eukaryote_schema.ExonLocus(start=4, end=4) transcript4 = eukaryote_schema.TranscriptSpeciesType(rna=rna1, exons=[exon4]) self.assertEqual(transcript4.get_empirical_formula(), chem.EmpiricalFormula('C9H11N2O9P')) dna2 = core.DnaSpeciesType(id='dna4', sequence_path=self.sequence_path) gene2 = eukaryote_schema.GeneLocus(polymer=dna2, start=1, end=4) rna2 = eukaryote_schema.PreRnaSpeciesType(gene=gene2) exon5_1 = eukaryote_schema.ExonLocus(start=1, end=1) exon5_2 = eukaryote_schema.ExonLocus(start=3, end=3) transcript5 = eukaryote_schema.TranscriptSpeciesType( rna=rna2, exons=[exon5_1, exon5_2]) self.assertEqual(transcript5.get_empirical_formula(), chem.EmpiricalFormula('C20H23N10O13P2'))
def test_constructor(self): met = core.MetaboliteSpeciesType(structure=( 'InChI=1S' '/C10H14N5O7P' '/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(22-10)1-21-23(18,19)20' '/h2-4,6-7,10,16-17H,1H2,(H2,11,12,13)(H2,18,19,20)' '/p-2/t4-,6-,7-,10-' '/m1' '/s1')) self.assertEqual(met.get_structure(), met.structure) self.assertEqual(met.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O7P')) self.assertEqual(met.get_charge(), -2) self.assertAlmostEqual(met.get_mol_wt(), 345.20530, places=4)
def test_ComplexSpeciesType(self): self.tmp_dirname = tempfile.mkdtemp() sequence_path = os.path.join(self.tmp_dirname, 'test_seq.fasta') with open(sequence_path, 'w') as f: f.write( '>dna1\nTTTATGAARGTNCTCATHAAYAARAAYGARCTCTAGTTTATGAARTTYAARTTYCTCCTCACNCCNCTCTAATTT\n' ) dna1 = core.DnaSpeciesType(id='dna1', sequence_path=sequence_path) # Protein subunit 1 gene1 = eukaryote_schema.GeneLocus(polymer=dna1, start=1, end=36) rna1 = eukaryote_schema.PreRnaSpeciesType(gene=gene1) exon1 = eukaryote_schema.ExonLocus(start=4, end=36) transcript1 = eukaryote_schema.TranscriptSpeciesType(rna=rna1, exons=[exon1]) cds1 = eukaryote_schema.CdsLocus(start=4, end=36) prot1 = eukaryote_schema.ProteinSpeciesType(transcript=transcript1, coding_region=cds1) # Protein subunit 2 gene2 = eukaryote_schema.GeneLocus(polymer=dna1, start=37, end=75) rna2 = eukaryote_schema.PreRnaSpeciesType(gene=gene2) exon2 = eukaryote_schema.ExonLocus(start=40, end=72) transcript2 = eukaryote_schema.TranscriptSpeciesType(rna=rna2, exons=[exon2]) cds2 = eukaryote_schema.CdsLocus(start=40, end=72) prot2 = eukaryote_schema.ProteinSpeciesType(transcript=transcript2, coding_region=cds2) # Complex formation: (2) prot1 + (3) prot2 ==> complex1 species_coeff1 = core.SpeciesTypeCoefficient(species_type=prot1, coefficient=2) species_coeff2 = core.SpeciesTypeCoefficient(species_type=prot2, coefficient=3) complex1 = core.ComplexSpeciesType( subunits=[species_coeff1, species_coeff2]) self.assertEqual(complex1.get_charge(), 8) self.assertAlmostEqual( complex1.get_mol_wt(), (2 * prot1.get_mol_wt() + 3 * prot2.get_mol_wt())) self.assertEqual(complex1.get_empirical_formula(), chem.EmpiricalFormula('C292H492N64O66S5')) shutil.rmtree(self.tmp_dirname)
def deserialize(self, value): """ Deserialize value Args: value (:obj:`str`): semantically equivalent representation Returns: :obj:`tuple`: * :obj:`chem.EmpiricalFormula`: cleaned value * :obj:`core.InvalidAttribute`: cleaning error """ if value: try: return (chem.EmpiricalFormula(value), None) except ValueError as error: return (None, core.InvalidAttribute(self, [str(error)])) return (None, None)
def test_EmpiricalFormula___str__(self): f = chem.EmpiricalFormula('H2O') self.assertEqual(str(f), 'H2O') f = chem.EmpiricalFormula('OH2') self.assertEqual(str(f), 'H2O') f = chem.EmpiricalFormula('N0OH2') self.assertEqual(str(f), 'H2O') f = chem.EmpiricalFormula('H2O1.1') self.assertEqual(str(f), 'H2O1.1') f = chem.EmpiricalFormula('H2O1.1e-3') self.assertEqual(str(f), 'H2O0.0011') f = chem.EmpiricalFormula('H2O1.1e+3') self.assertEqual(str(f), 'H2O1100') f = chem.EmpiricalFormula('H2O-1.1e+3') self.assertEqual(str(f), 'H2O-1100')
def test_get_empirical_formula(self): dna1 = core.DnaSpeciesType(id='dna3', sequence_path=self.sequence_path) gene1 = eukaryote.GeneLocus(polymer=dna1, start=1, end=4) exon1 = eukaryote.GenericLocus(start=1, end=1) transcript1 = eukaryote.TranscriptSpeciesType(gene=gene1, exons=[exon1]) self.assertEqual(transcript1.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O7P')) exon2 = eukaryote.GenericLocus(start=2, end=2) transcript2 = eukaryote.TranscriptSpeciesType(gene=gene1, exons=[exon2]) self.assertEqual(transcript2.get_empirical_formula(), chem.EmpiricalFormula('C9H12N3O8P')) exon3 = eukaryote.GenericLocus(start=3, end=3) transcript3 = eukaryote.TranscriptSpeciesType(gene=gene1, exons=[exon3]) self.assertEqual(transcript3.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O8P')) exon4 = eukaryote.GenericLocus(start=4, end=4) transcript4 = eukaryote.TranscriptSpeciesType(gene=gene1, exons=[exon4]) self.assertEqual(transcript4.get_empirical_formula(), chem.EmpiricalFormula('C9H11N2O9P')) dna2 = core.DnaSpeciesType(id='dna4', sequence_path=self.sequence_path) gene2 = eukaryote.GeneLocus(polymer=dna2, start=1, end=4) exon5_1 = eukaryote.GenericLocus(start=1, end=1) exon5_2 = eukaryote.GenericLocus(start=3, end=3) transcript5 = eukaryote.TranscriptSpeciesType(gene=gene2, exons=[exon5_1, exon5_2]) self.assertEqual(transcript5.get_empirical_formula(), chem.EmpiricalFormula('C20H23N10O13P2')) # Test using input sequence test_trans = eukaryote.TranscriptSpeciesType() self.assertEqual( test_trans.get_empirical_formula(seq_input=Bio.Seq.Seq('AA')), chem.EmpiricalFormula('C20H23N10O13P2'))
def test(self): attr = obj_tables.chem.ChemicalFormulaAttribute() primary_attr = obj_tables.chem.ChemicalFormulaAttribute(primary=True, unique=True) self.assertEqual(attr.default, None) attr = obj_tables.chem.ChemicalFormulaAttribute(default='C1H1O2') self.assertEqual(attr.default, chem.EmpiricalFormula('C1H1O2')) attr = obj_tables.chem.ChemicalFormulaAttribute( default=chem.EmpiricalFormula('C1H1O2')) self.assertEqual(attr.default, chem.EmpiricalFormula('C1H1O2')) class Node(core.Model): value = obj_tables.chem.ChemicalFormulaAttribute() attr = Node.Meta.attributes['value'] # deserialize self.assertEqual(attr.deserialize(''), (None, None)) self.assertEqual(attr.deserialize(None), (None, None)) self.assertEqual(attr.deserialize('X'), (chem.EmpiricalFormula('X'), None)) self.assertEqual(attr.deserialize('x')[0], None) self.assertNotEqual(attr.deserialize('x')[1], None) # serialize self.assertEqual(attr.serialize(''), '') self.assertEqual(attr.serialize(None), '') self.assertEqual(attr.serialize(chem.EmpiricalFormula('C1HO2')), 'CHO2') # deserialize + serialize self.assertEqual(attr.serialize(attr.deserialize('')[0]), '') self.assertEqual(attr.serialize(attr.deserialize(None)[0]), '') self.assertEqual(attr.serialize(attr.deserialize('CHO2')[0]), 'CHO2') # validate node = Node() self.assertEqual(attr.validate(node, None), None) self.assertEqual(attr.validate(node, chem.EmpiricalFormula('C1HO2')), None) self.assertNotEqual(attr.validate(node, ''), None) self.assertNotEqual(attr.validate(node, 'x'), None) self.assertNotEqual(attr.validate(node, 1), None) attr2 = obj_tables.chem.ChemicalFormulaAttribute(primary=True) self.assertEqual(attr.validate(None, None), None) self.assertEqual(attr.validate(None, chem.EmpiricalFormula('C')), None) self.assertNotEqual(attr2.validate(None, None), None) self.assertEqual(attr2.validate(None, chem.EmpiricalFormula('C')), None) # validate_unique nodes = [Node(), Node()] self.assertEqual( attr.validate_unique(nodes, [ chem.EmpiricalFormula('CHO2'), chem.EmpiricalFormula('C2HO2') ]), None) self.assertNotEqual( attr.validate_unique(nodes, [ chem.EmpiricalFormula('CHO2'), chem.EmpiricalFormula('C1HO2') ]), None) # to/from JSON self.assertEqual(attr.to_builtin(None), None) self.assertEqual(attr.to_builtin(''), None) self.assertEqual(attr.to_builtin(chem.EmpiricalFormula('CHO2')), { 'C': 1, 'H': 1, 'O': 2 }) self.assertEqual(attr.to_builtin(chem.EmpiricalFormula('C1HO2')), { 'C': 1, 'H': 1, 'O': 2 }) self.assertEqual(attr.from_builtin(None), None) self.assertEqual(attr.from_builtin(''), None) self.assertEqual(attr.from_builtin('CHO2'), chem.EmpiricalFormula('CHO2')) self.assertEqual(attr.from_builtin('C1HO2'), chem.EmpiricalFormula('CHO2')) self.assertEqual(attr.from_builtin({ 'C': 1, 'H': 1, 'O': 2 }), chem.EmpiricalFormula('CHO2')) self.assertEqual(attr.from_builtin({ 'C': 1, 'H': 1, 'O': 2 }), chem.EmpiricalFormula('C1HO2')) # get_xlsx_validation attr.get_xlsx_validation() primary_attr.get_xlsx_validation()
def test_EmpiricalFormula___mul__(self): f = chem.EmpiricalFormula('H2O') self.assertEqual(str(f * 2), 'H4O2')
def get_empirical_formula(self, table=1, cds=True, seq_input=None): """ Get the empirical formula Args: table (:obj:`int`, optional): NCBI identifier for translation table (default = standard table) cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it instead of reading from fasta file to reduce IO operation Returns: :obj:`chem.EmpiricalFormula`: empirical formula """ if seq_input: seq = seq_input else: seq = self.get_seq(table=table, cds=cds) l = len(seq) - seq.count('*') n_a = seq.count('A') # Ala: Alanine (C3 H7 N O2) n_r = seq.count('R') # Arg: Arginine (C6 H14 N4 O2) n_n = seq.count('N') # Asn: Asparagine (C4 H8 N2 O3) n_d = seq.count('D') # Asp: Aspartic acid (C4 H7 N O4) n_c = seq.count('C') # Cys: Cysteine (C3 H7 N O2 S) n_q = seq.count('Q') # Gln: Glutamine (C5 H10 N2 O3) n_e = seq.count('E') # Glu: Glutamic acid (C5 H9 N O4) n_g = seq.count('G') # Gly: Glycine (C2 H5 N O2) n_h = seq.count('H') # His: Histidine (C6 H9 N3 O2) n_i = seq.count('I') # Ile: Isoleucine (C6 H13 N O2) n_l = seq.count('L') # Leu: Leucine (C6 H13 N O2) n_k = seq.count('K') # Lys: Lysine (C6 H14 N2 O2) n_m = seq.count('M') # Met: Methionine (C5 H11 N O2 S) n_f = seq.count('F') # Phe: Phenylalanine (C9 H11 N O2) n_p = seq.count('P') # Pro: Proline (C5 H9 N O2) n_s = seq.count('S') # Ser: Serine (C3 H7 N O3) n_t = seq.count('T') # Thr: Threonine (C4 H9 N O3) n_w = seq.count('W') # Trp: Tryptophan (C11 H12 N2 O2) n_y = seq.count('Y') # Tyr: Tyrosine (C9 H11 N O3) n_v = seq.count('V') # Val: Valine (C5 H11 N O2) n_u = seq.count('U') # Selcys: Selenocysteine (C3 H7 N O2 Se) formula = chem.EmpiricalFormula() formula.C = 3 * n_a + 6 * n_r + 4 * n_n + 4 * n_d + 3 * n_c + \ 5 * n_q + 5 * n_e + 2 * n_g + 6 * n_h + 6 * n_i + \ 6 * n_l + 6 * n_k + 5 * n_m + 9 * n_f + 5 * n_p + \ 3 * n_s + 4 * n_t + 11 * n_w + 9 * n_y + 5 * n_v + \ 3 * n_u formula.H = 7 * n_a + 14 * n_r + 8 * n_n + 7 * n_d + 7 * n_c + \ 10 * n_q + 9 * n_e + 5 * n_g + 9 * n_h + 13 * n_i + \ 13 * n_l + 14 * n_k + 11 * n_m + 11 * n_f + 9 * n_p + \ 7 * n_s + 9 * n_t + 12 * n_w + 11 * n_y + 11 * n_v + \ 7 * n_u - 2 * (l - 1) formula.N = 1 * n_a + 4 * n_r + 2 * n_n + 1 * n_d + 1 * n_c + \ 2 * n_q + 1 * n_e + 1 * n_g + 3 * n_h + 1 * n_i + \ 1 * n_l + 2 * n_k + 1 * n_m + 1 * n_f + 1 * n_p + \ 1 * n_s + 1 * n_t + 2 * n_w + 1 * n_y + 1 * n_v + \ 1 * n_u formula.O = 2 * n_a + 2 * n_r + 3 * n_n + 4 * n_d + 2 * n_c + \ 3 * n_q + 4 * n_e + 2 * n_g + 2 * n_h + 2 * n_i + \ 2 * n_l + 2 * n_k + 2 * n_m + 2 * n_f + 2 * n_p + \ 3 * n_s + 3 * n_t + 2 * n_w + 3 * n_y + 2 * n_v + \ 2 * n_u - (l - 1) formula.S = n_c + n_m formula.Se = n_u return formula
def test_get_empirical_formula(self): # Default translation table used is 1 (standard) self.assertEqual(self.prot1.get_empirical_formula(), chem.EmpiricalFormula('C53H96N14O15S1')) self.assertEqual(self.prot2.get_empirical_formula(), chem.EmpiricalFormula('C53H91N11O11S1'))
def test_EmpiricalFormula___sub__(self): f = chem.EmpiricalFormula('H2O') g = chem.EmpiricalFormula('HO') self.assertEqual(str(f - g), 'H') self.assertEqual(str(f - 'HO'), 'H')
def test_EmpiricalFormula_get_attr(self): f = chem.EmpiricalFormula() self.assertEqual(f.C, 0) self.assertEqual(f['C'], 0)
def test_EmpiricalFormula___truediv__(self): f = chem.EmpiricalFormula('H4O2') self.assertEqual(f / 2, chem.EmpiricalFormula({'H': 2, 'O': 1}))
def test(self): self.tmp_dirname = tempfile.mkdtemp() filepath = os.path.join(self.tmp_dirname, 'test_seq.fasta') with open(filepath, 'w') as f: f.write('>dna1\nACGTACGT\n' '>dna2\nACGTACGTNNNN\n') dna = core.DnaSpeciesType(id='dna1', name='dna1', sequence_path=filepath, circular=False, double_stranded=False, ploidy=2) self.assertEqual(dna.id, 'dna1') self.assertEqual(dna.name, 'dna1') self.assertEqual(dna.circular, False) self.assertEqual(dna.double_stranded, False) self.assertEqual(dna.ploidy, 2) L = dna.get_len() self.assertEqual( dna.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O6P') * 2 + chem.EmpiricalFormula('C9H12N3O7P') * 2 + chem.EmpiricalFormula('C10H12N5O7P') * 2 + chem.EmpiricalFormula('C10H13N2O8P') * 2 - chem.EmpiricalFormula('OH') * (L - 1)) self.assertEqual(dna.get_charge(), -L - 1) exp_mol_wt = \ + Bio.SeqUtils.molecular_weight(dna.get_seq(), seq_type='DNA', circular=dna.circular, double_stranded=dna.double_stranded) \ - 9 * mendeleev.element('H').atomic_weight self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0) # Make DNA circular, single stranded dna.circular = True dna.double_stranded = False self.assertEqual( dna.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O6P') * 2 + chem.EmpiricalFormula('C9H12N3O7P') * 2 + chem.EmpiricalFormula('C10H12N5O7P') * 2 + chem.EmpiricalFormula('C10H13N2O8P') * 2 - chem.EmpiricalFormula('OH') * L) self.assertEqual(dna.get_charge(), -L) exp_mol_wt = \ + Bio.SeqUtils.molecular_weight(dna.get_seq(), seq_type='DNA', circular=dna.circular, double_stranded=dna.double_stranded) \ - 8 * mendeleev.element('H').atomic_weight self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0) # Make DNA linear, double stranded dna.circular = False dna.double_stranded = True self.assertEqual( dna.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O6P') * 2 * 2 + chem.EmpiricalFormula('C9H12N3O7P') * 2 * 2 + chem.EmpiricalFormula('C10H12N5O7P') * 2 * 2 + chem.EmpiricalFormula('C10H13N2O8P') * 2 * 2 - chem.EmpiricalFormula('OH') * (L - 1) * 2) self.assertEqual(dna.get_charge(), 2 * (-L - 1)) exp_mol_wt = \ + Bio.SeqUtils.molecular_weight(dna.get_seq(), seq_type='DNA', circular=dna.circular, double_stranded=dna.double_stranded) \ - 18 * mendeleev.element('H').atomic_weight self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0) # Make DNA circular, double stranded dna.circular = True dna.double_stranded = True self.assertEqual( dna.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O6P') * 2 * 2 + chem.EmpiricalFormula('C9H12N3O7P') * 2 * 2 + chem.EmpiricalFormula('C10H12N5O7P') * 2 * 2 + chem.EmpiricalFormula('C10H13N2O8P') * 2 * 2 - chem.EmpiricalFormula('OH') * L * 2) self.assertEqual(dna.get_charge(), 2 * -L) exp_mol_wt = \ + Bio.SeqUtils.molecular_weight(dna.get_seq(), seq_type='DNA', circular=dna.circular, double_stranded=dna.double_stranded) \ - 16 * mendeleev.element('H').atomic_weight self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0) # If there are N's in the DNA sequence dna2 = core.DnaSpeciesType(id='dna2', sequence_path=filepath, circular=False, double_stranded=True) L = dna2.get_len() self.assertEqual( dna2.get_empirical_formula(), chem.EmpiricalFormula('C10H12N5O6P') * 3 * 2 + chem.EmpiricalFormula('C9H12N3O7P') * 3 * 2 + chem.EmpiricalFormula('C10H12N5O7P') * 3 * 2 + chem.EmpiricalFormula('C10H13N2O8P') * 3 * 2 - chem.EmpiricalFormula('OH') * (L - 1) * 2) shutil.rmtree(self.tmp_dirname)
def test_EmpiricalFormula___contains__(self): f = chem.EmpiricalFormula('H2O') self.assertIn('H', f) self.assertIn('C', f) self.assertNotIn('Ccc', f)
def test_EmpiricalFormula_constructor(self): f = chem.EmpiricalFormula() self.assertEqual(f, {}) f = chem.EmpiricalFormula('H') self.assertEqual(f, {'H': 1}) f = chem.EmpiricalFormula('H2') self.assertEqual(f, {'H': 2}) f = chem.EmpiricalFormula('H2.5') self.assertEqual(f, {'H': 2.5}) f = chem.EmpiricalFormula('H2.5e3') self.assertEqual(f, {'H': 2.5e3}) f = chem.EmpiricalFormula('H-2.5e3') self.assertEqual(f, {'H': -2.5e3}) f = chem.EmpiricalFormula('H2.5e+3') self.assertEqual(f, {'H': 2.5e3}) f = chem.EmpiricalFormula('H2.5e-3') self.assertEqual(f, {'H': 2.5e-3}) f = chem.EmpiricalFormula('He2') self.assertEqual(f, {'He': 2}) f = chem.EmpiricalFormula('He-2') self.assertEqual(f, {'He': -2}) f = chem.EmpiricalFormula('He-20') self.assertEqual(f, {'He': -20}) f = chem.EmpiricalFormula('H2O') self.assertEqual(f, {'H': 2, 'O': 1}) f = chem.EmpiricalFormula('He-20He30') self.assertEqual(f, {'He': 10}) f = chem.EmpiricalFormula('RaRb') self.assertEqual(f, {'Ra': 1, 'Rb': 1}) f = chem.EmpiricalFormula(attrdict.AttrDict({'Ra': 1, 'Rb': 1})) self.assertEqual(f, {'Ra': 1, 'Rb': 1}) f = chem.EmpiricalFormula(attrdict.AttrDefault(int, { 'Ra': 1, 'Rb': 1 })) self.assertEqual(f, {'Ra': 1, 'Rb': 1}) f = chem.EmpiricalFormula(chem.EmpiricalFormula('RaRb')) self.assertEqual(f, {'Ra': 1, 'Rb': 1}) with self.assertRaisesRegex(ValueError, 'not a valid formula'): chem.EmpiricalFormula('Hee2') with self.assertRaisesRegex(ValueError, 'not a valid formula'): chem.EmpiricalFormula('h2')