Ejemplo n.º 1
0
 def test_get_empirical_formula(self):
     # MPN001
     self.assertEqual(self.prot1.get_empirical_formula(),
                      chem.EmpiricalFormula('C1980H3146N510O596S7'))
     # MPN011
     self.assertEqual(self.prot2.get_empirical_formula(),
                      chem.EmpiricalFormula('C1246H1928N306O352S3'))
Ejemplo n.º 2
0
    def test_EmpiricalFormula___setitem__(self):
        f = chem.EmpiricalFormula()
        f.C = 0
        self.assertEqual(f, {})
        self.assertEqual(dict(f), {})
        self.assertEqual(str(f), '')

        f = chem.EmpiricalFormula()
        f.A = 1
        self.assertEqual(f, {'A': 1})
        f.A = 0
        self.assertEqual(f, {})
        self.assertEqual(dict(f), {})
        self.assertEqual(str(f), '')
        f.A = 1.5
        self.assertEqual(f, {'A': 1.5})

        f = chem.EmpiricalFormula()
        with self.assertRaisesRegex(ValueError, 'Coefficient must be a float'):
            f.A = 'a'

        f = chem.EmpiricalFormula()
        with self.assertRaisesRegex(
                ValueError, 'Element must be a one or two letter string'):
            f.Aaa = 1
Ejemplo n.º 3
0
    def test_get_empirical_formula(self):
        dna1 = core.DnaSpeciesType(id='dna2', sequence_path=self.sequence_path)

        gene1 = eukaryote_schema.GeneLocus(polymer=dna1, start=1, end=1)
        rna1 = eukaryote_schema.PreRnaSpeciesType(gene=gene1)
        self.assertEqual(rna1.get_empirical_formula(),
                         chem.EmpiricalFormula('C10H12N5O7P'))

        gene2 = eukaryote_schema.GeneLocus(polymer=dna1, start=2, end=2)
        rna2 = eukaryote_schema.PreRnaSpeciesType(gene=gene2)
        self.assertEqual(rna2.get_empirical_formula(),
                         chem.EmpiricalFormula('C9H12N3O8P'))

        gene3 = eukaryote_schema.GeneLocus(polymer=dna1, start=3, end=3)
        rna3 = eukaryote_schema.PreRnaSpeciesType(gene=gene3)
        self.assertEqual(rna3.get_empirical_formula(),
                         chem.EmpiricalFormula('C10H12N5O8P'))

        gene4 = eukaryote_schema.GeneLocus(polymer=dna1, start=4, end=4)
        rna4 = eukaryote_schema.PreRnaSpeciesType(gene=gene4)
        self.assertEqual(rna4.get_empirical_formula(),
                         chem.EmpiricalFormula('C9H11N2O9P'))

        dna2 = core.DnaSpeciesType(id='dna3', sequence_path=self.sequence_path)
        gene5 = eukaryote_schema.GeneLocus(polymer=dna2, start=1, end=2)
        rna5 = eukaryote_schema.PreRnaSpeciesType(gene=gene5)
        self.assertEqual(rna5.get_empirical_formula(),
                         chem.EmpiricalFormula('C20H23N10O13P2'))
Ejemplo n.º 4
0
    def test_get_empirical_formula(self):

        dna1 = core.DnaSpeciesType(id='dna2', sequence_path=self.sequence_path)
        tu1 = prokaryote.TranscriptionUnitLocus(id='tu1',
                                                polymer=dna1,
                                                start=1,
                                                end=1)
        rna1 = prokaryote.RnaSpeciesType(id='rna1',
                                         name='rna1',
                                         transcription_units=[tu1])
        self.assertEqual(rna1.get_empirical_formula(),
                         chem.EmpiricalFormula('C10H12N5O7P'))

        dna1 = core.DnaSpeciesType(id='dna3', sequence_path=self.sequence_path)
        tu1 = prokaryote.TranscriptionUnitLocus(id='tu1',
                                                polymer=dna1,
                                                start=1,
                                                end=1)
        rna1 = prokaryote.RnaSpeciesType(id='rna1',
                                         name='rna1',
                                         transcription_units=[tu1])
        self.assertEqual(rna1.get_empirical_formula(),
                         chem.EmpiricalFormula('C9H12N3O8P'))

        dna1 = core.DnaSpeciesType(id='dna4', sequence_path=self.sequence_path)
        tu1 = prokaryote.TranscriptionUnitLocus(id='tu1',
                                                polymer=dna1,
                                                start=1,
                                                end=1)
        rna1 = prokaryote.RnaSpeciesType(id='rna1',
                                         name='rna1',
                                         transcription_units=[tu1])
        self.assertEqual(rna1.get_empirical_formula(),
                         chem.EmpiricalFormula('C10H12N5O8P'))

        dna1 = core.DnaSpeciesType(id='dna5', sequence_path=self.sequence_path)
        tu1 = prokaryote.TranscriptionUnitLocus(id='tu1',
                                                polymer=dna1,
                                                start=1,
                                                end=1)
        rna1 = prokaryote.RnaSpeciesType(id='rna1',
                                         name='rna1',
                                         transcription_units=[tu1])
        self.assertEqual(rna1.get_empirical_formula(),
                         chem.EmpiricalFormula('C9H11N2O9P'))

        dna1 = core.DnaSpeciesType(id='dna6', sequence_path=self.sequence_path)
        tu1 = prokaryote.TranscriptionUnitLocus(id='tu1',
                                                polymer=dna1,
                                                start=1,
                                                end=2)
        rna1 = prokaryote.RnaSpeciesType(id='rna1',
                                         name='rna1',
                                         transcription_units=[tu1])
        self.assertEqual(rna1.get_empirical_formula(),
                         chem.EmpiricalFormula('C20H23N10O13P2'))
Ejemplo n.º 5
0
    def test_EmpiricalFormula___hash__(self):
        f = chem.EmpiricalFormula('H2O')
        g = chem.EmpiricalFormula('H2O')
        h = chem.EmpiricalFormula('H')

        self.assertIn(f, [g])
        self.assertIn(f, set([g]))
        self.assertIn(f, {g: True})

        self.assertNotIn(f, [h])
        self.assertNotIn(f, set([h]))
        self.assertNotIn(f, {h: True})
Ejemplo n.º 6
0
    def test_get_empirical_formula(self):
        # Default translation table used is 1 (standard)
        self.assertEqual(self.prot1.get_empirical_formula(),
                         chem.EmpiricalFormula('C53H96N14O15S1'))
        self.assertEqual(self.prot2.get_empirical_formula(),
                         chem.EmpiricalFormula('C53H91N11O11S1'))

        # Test using input sequence
        test_prot = eukaryote.ProteinSpeciesType()
        self.assertEqual(test_prot.get_empirical_formula(seq_input=Bio.Seq.Seq('MKVLINKNEL')),
                         chem.EmpiricalFormula('C53H96N14O15S1'))
        self.assertEqual(test_prot.get_empirical_formula(seq_input=Bio.Seq.Seq('MKKFLLTPL')),
                         chem.EmpiricalFormula('C53H91N11O11S1'))
Ejemplo n.º 7
0
    def get_empirical_formula(self):
        """ Get the empirical formula for an RNA transcript with

        * 5' monophosphate
        * Deprotonated phosphate oxygens

        :math:`N_A * AMP + N_C * CMP + N_G * GMP + N_U * UMP - (L-1) * OH`

        Returns:
           :obj:`chem.EmpiricalFormula`: empirical formula
        """
        seq = self.get_seq()
        n_a = seq.count('A')
        n_c = seq.count('C')
        n_g = seq.count('G')
        n_u = seq.count('U')
        l = len(seq)

        formula = chem.EmpiricalFormula()
        formula.C = 10 * n_a + 9 * n_c + 10 * n_g + 9 * n_u
        formula.H = 12 * n_a + 12 * n_c + 12 * n_g + 11 * n_u - (l - 1)
        formula.N = 5 * n_a + 3 * n_c + 5 * n_g + 2 * n_u
        formula.O = 7 * n_a + 8 * n_c + 8 * n_g + 9 * n_u - (l - 1)
        formula.P = n_a + n_c + n_g + n_u

        return formula
Ejemplo n.º 8
0
    def __init__(
            self,
            default=None,
            none_value=None,
            verbose_name='',
            description="A chemical formula (e.g. 'H2O', 'CO2', or 'NaCl')",
            primary=False,
            unique=False):
        """
        Args:
            default (:obj:`chem.EmpiricalFormula`, :obj:`dict`, :obj:`str`, or :obj:`None`, optional): default value
            none_value (:obj:`object`, optional): none value
            verbose_name (:obj:`str`, optional): verbose name
            description (:obj:`str`, optional): description
            primary (:obj:`bool`, optional): indicate if attribute is primary attribute
            unique (:obj:`bool`, optional): indicate if attribute value must be unique
        """
        if not isinstance(default,
                          chem.EmpiricalFormula) and default is not None:
            default = chem.EmpiricalFormula(default)

        super(ChemicalFormulaAttribute,
              self).__init__(default=default,
                             none_value=none_value,
                             verbose_name=verbose_name,
                             description=description,
                             primary=primary,
                             unique=unique)
        if primary:
            self.type = chem.EmpiricalFormula
        else:
            self.type = (chem.EmpiricalFormula, None.__class__)
Ejemplo n.º 9
0
    def get_empirical_formula(self, seq_input=None):
        """ Get the empirical formula for a transcript (spliced RNA) species with

        * 5' monophosphate
        * Deprotonated phosphate oxygens

        :math:`N_A * AMP + N_C * CMP + N_G * GMP + N_U * UMP - (L-1) * OH`

        Args:
            seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it
                instead of reading from fasta file to reduce IO operation 

        Returns:
           :obj:`chem.EmpiricalFormula`: empirical formula
        """
        if seq_input:
            seq = seq_input
        else:
            seq = self.get_seq()

        n_a = seq.upper().count('A')
        n_c = seq.upper().count('C')
        n_g = seq.upper().count('G')
        n_u = seq.upper().count('U')
        l = len(seq)

        formula = chem.EmpiricalFormula()
        formula.C = 10 * n_a + 9 * n_c + 10 * n_g + 9 * n_u
        formula.H = 12 * n_a + 12 * n_c + 12 * n_g + 11 * n_u - (l - 1)
        formula.N = 5 * n_a + 3 * n_c + 5 * n_g + 2 * n_u
        formula.O = 7 * n_a + 8 * n_c + 8 * n_g + 9 * n_u - (l - 1)
        formula.P = n_a + n_c + n_g + n_u

        return formula
Ejemplo n.º 10
0
    def test_ComplexSpeciesType(self):

        # Test constructor
        complex1 = core.ComplexSpeciesType()

        self.assertEqual(complex1.region, '')
        self.assertEqual(complex1.binding, '')
        self.assertEqual(complex1.complex_type, '')
        self.assertEqual(complex1.composition_in_uniprot, '')
        self.assertEqual(complex1.formation_process, None)
        self.assertEqual(complex1.subunits, [])

        cofactor1 = core.MetaboliteSpeciesType(
            id='cofactor1',
            structure=
            'InChI=1S/C8H7NO3/c10-6-1-4-5(2-7(6)11)9-3-8(4)12/h1-2,8-9,12H,3H2'
        )
        cofactor2 = core.MetaboliteSpeciesType(id='cofactor2',
                                               structure='InChI=1S/Zn/q+2')

        # Test adding subunit composition
        # Add subunit composition: (2) cofactor1 + (3) cofactor2 ==> complex1
        species_type_coeff1 = core.SpeciesTypeCoefficient(
            species_type=cofactor1, coefficient=2)
        species_type_coeff2 = core.SpeciesTypeCoefficient(
            species_type=cofactor2, coefficient=3)
        complex1.subunits = [species_type_coeff1, species_type_coeff2]

        self.assertEqual(complex1.get_charge(), 6)
        self.assertAlmostEqual(
            complex1.get_mol_wt(),
            (2 * cofactor1.get_mol_wt() + 3 * cofactor2.get_mol_wt()))
        self.assertEqual(complex1.get_empirical_formula(),
                         chem.EmpiricalFormula('C16H14N2O6Zn3'))
Ejemplo n.º 11
0
 def test_get_formula(self):
     gly_inchi = 'InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)'
     gly_formula = 'C2H5NO2'
     mol = openbabel.OBMol()
     conversion = openbabel.OBConversion()
     conversion.SetInFormat('inchi')
     conversion.ReadString(mol, gly_inchi)
     self.assertEqual(chem.OpenBabelUtils.get_formula(mol),
                      chem.EmpiricalFormula('C2H5NO2'))
Ejemplo n.º 12
0
    def get_empirical_formula(self, cds=True):
        """ Get the empirical formula

        Returns:
            :obj:`chem.EmpiricalFormula`: empirical formula
        """
        seq = self.get_seq(cds=cds)
        l = len(seq)

        n_a = seq.count('A')  # Ala: Alanine (C3 H7 N O2)
        n_r = seq.count('R')  # Arg: Arginine (C6 H14 N4 O2)
        n_n = seq.count('N')  # Asn: Asparagine (C4 H8 N2 O3)
        n_d = seq.count('D')  # Asp: Aspartic acid (C4 H7 N O4)
        n_c = seq.count('C')  # Cys: Cysteine (C3 H7 N O2 S)

        n_q = seq.count('Q')  # Gln: Glutamine (C5 H10 N2 O3)
        n_e = seq.count('E')  # Glu: Glutamic acid (C5 H9 N O4)
        n_g = seq.count('G')  # Gly: Glycine (C2 H5 N O2)
        n_h = seq.count('H')  # His: Histidine (C6 H9 N3 O2)
        n_i = seq.count('I')  # Ile: Isoleucine (C6 H13 N O2)

        n_l = seq.count('L')  # Leu: Leucine (C6 H13 N O2)
        n_k = seq.count('K')  # Lys: Lysine (C6 H14 N2 O2)
        n_m = seq.count('M')  # Met: Methionine (C5 H11 N O2 S)
        n_f = seq.count('F')  # Phe: Phenylalanine (C9 H11 N O2)
        n_p = seq.count('P')  # Pro: Proline (C5 H9 N O2)

        n_s = seq.count('S')  # Ser: Serine (C3 H7 N O3)
        n_t = seq.count('T')  # Thr: Threonine (C4 H9 N O3)
        n_w = seq.count('W')  # Trp: Tryptophan (C11 H12 N2 O2)
        n_y = seq.count('Y')  # Tyr: Tyrosine (C9 H11 N O3)
        n_v = seq.count('V')  # Val: Valine (C5 H11 N O2)

        formula = chem.EmpiricalFormula()

        formula.C = 3 * n_a + 6 * n_r + 4 * n_n + 4 * n_d + 3 * n_c + \
            5 * n_q + 5 * n_e + 2 * n_g + 6 * n_h + 6 * n_i + \
            6 * n_l + 6 * n_k + 5 * n_m + 9 * n_f + 5 * n_p + \
            3 * n_s + 4 * n_t + 11 * n_w + 9 * n_y + 5 * n_v

        formula.H = 7 * n_a + 14 * n_r + 8 * n_n + 7 * n_d + 7 * n_c + \
            10 * n_q + 9 * n_e + 5 * n_g + 9 * n_h + 13 * n_i + \
            13 * n_l + 14 * n_k + 11 * n_m + 11 * n_f + 9 * n_p + \
            7 * n_s + 9 * n_t + 12 * n_w + 11 * n_y + 11 * n_v - 2 * (l - 1)

        formula.N = 1 * n_a + 4 * n_r + 2 * n_n + 1 * n_d + 1 * n_c + \
            2 * n_q + 1 * n_e + 1 * n_g + 3 * n_h + 1 * n_i + \
            1 * n_l + 2 * n_k + 1 * n_m + 1 * n_f + 1 * n_p + \
            1 * n_s + 1 * n_t + 2 * n_w + 1 * n_y + 1 * n_v

        formula.O = 2 * n_a + 2 * n_r + 3 * n_n + 4 * n_d + 2 * n_c + \
            3 * n_q + 4 * n_e + 2 * n_g + 2 * n_h + 2 * n_i + \
            2 * n_l + 2 * n_k + 2 * n_m + 2 * n_f + 2 * n_p + \
            3 * n_s + 3 * n_t + 2 * n_w + 3 * n_y + 2 * n_v - (l - 1)

        formula.S = n_c + n_m
        return formula
Ejemplo n.º 13
0
    def test_ComplexSpeciesType(self):

        # Test constructor
        complex1 = core.ComplexSpeciesType()

        # Generate test proteins from  Mycoplasma Genintalium Genome
        dna1 = core.DnaSpeciesType(id='chromosome',
                                   sequence_path='tests/fixtures/seq.fna')

        cell1 = dna1.cell = core.Cell()
        cell1.knowledge_base = core.KnowledgeBase(
            translation_table=4)  # Table 4 is for mycoplasma

        # Protein 1,  MPN001
        gene1 = prokaryote_schema.GeneLocus(id='gene1',
                                            cell=cell1,
                                            polymer=dna1,
                                            start=692,
                                            end=1834)
        tu1 = prokaryote_schema.TranscriptionUnitLocus(id='tu1',
                                                       genes=[gene1],
                                                       polymer=dna1)
        prot1 = prokaryote_schema.ProteinSpeciesType(id='prot1',
                                                     gene=gene1,
                                                     cell=cell1)

        # Protein 2, MPN011
        gene2 = prokaryote_schema.GeneLocus(id='gene2',
                                            cell=cell1,
                                            polymer=dna1,
                                            start=12838,
                                            end=13533,
                                            strand=core.PolymerStrand.negative)
        tu2 = prokaryote_schema.TranscriptionUnitLocus(id='tu2',
                                                       genes=[gene2],
                                                       polymer=dna1)
        prot2 = prokaryote_schema.ProteinSpeciesType(id='prot2',
                                                     gene=gene2,
                                                     cell=cell1)

        # Test adding complexation
        # Add formation reaction: (2) prot1 + (3) prot2 ==> complex1
        species_coeff1 = core.SpeciesTypeCoefficient(species_type=prot1,
                                                     coefficient=2)
        species_coeff2 = core.SpeciesTypeCoefficient(species_type=prot2,
                                                     coefficient=3)
        complex1.subunits = [species_coeff1, species_coeff2]

        self.assertEqual(complex1.get_charge(), 38)
        self.assertAlmostEqual(
            complex1.get_mol_wt(),
            (2 * prot1.get_mol_wt() + 3 * prot2.get_mol_wt()))
        self.assertEqual(complex1.get_empirical_formula(),
                         chem.EmpiricalFormula('C7698H12076N1938O2248S23'))
Ejemplo n.º 14
0
    def from_builtin(self, json):
        """ Decode a simple Python representation (dict, list, str, float, bool, None) of a value of the attribute
        that is compatible with JSON and YAML

        Args:
            json (:obj:`dict`): simple Python representation of a value of the attribute

        Returns:
            :obj:`chem.EmpiricalFormula`: decoded value of the attribute
        """
        if json:
            return chem.EmpiricalFormula(json)
        return None
Ejemplo n.º 15
0
    def test_get_empirical_formula(self):
        dna1 = core.DnaSpeciesType(id='dna3', sequence_path=self.sequence_path)
        gene1 = eukaryote_schema.GeneLocus(polymer=dna1, start=1, end=4)
        rna1 = eukaryote_schema.PreRnaSpeciesType(gene=gene1)

        exon1 = eukaryote_schema.ExonLocus(start=1, end=1)
        transcript1 = eukaryote_schema.TranscriptSpeciesType(rna=rna1,
                                                             exons=[exon1])
        self.assertEqual(transcript1.get_empirical_formula(),
                         chem.EmpiricalFormula('C10H12N5O7P'))

        exon2 = eukaryote_schema.ExonLocus(start=2, end=2)
        transcript2 = eukaryote_schema.TranscriptSpeciesType(rna=rna1,
                                                             exons=[exon2])
        self.assertEqual(transcript2.get_empirical_formula(),
                         chem.EmpiricalFormula('C9H12N3O8P'))

        exon3 = eukaryote_schema.ExonLocus(start=3, end=3)
        transcript3 = eukaryote_schema.TranscriptSpeciesType(rna=rna1,
                                                             exons=[exon3])
        self.assertEqual(transcript3.get_empirical_formula(),
                         chem.EmpiricalFormula('C10H12N5O8P'))

        exon4 = eukaryote_schema.ExonLocus(start=4, end=4)
        transcript4 = eukaryote_schema.TranscriptSpeciesType(rna=rna1,
                                                             exons=[exon4])
        self.assertEqual(transcript4.get_empirical_formula(),
                         chem.EmpiricalFormula('C9H11N2O9P'))

        dna2 = core.DnaSpeciesType(id='dna4', sequence_path=self.sequence_path)
        gene2 = eukaryote_schema.GeneLocus(polymer=dna2, start=1, end=4)
        rna2 = eukaryote_schema.PreRnaSpeciesType(gene=gene2)
        exon5_1 = eukaryote_schema.ExonLocus(start=1, end=1)
        exon5_2 = eukaryote_schema.ExonLocus(start=3, end=3)
        transcript5 = eukaryote_schema.TranscriptSpeciesType(
            rna=rna2, exons=[exon5_1, exon5_2])
        self.assertEqual(transcript5.get_empirical_formula(),
                         chem.EmpiricalFormula('C20H23N10O13P2'))
Ejemplo n.º 16
0
 def test_constructor(self):
     met = core.MetaboliteSpeciesType(structure=(
         'InChI=1S'
         '/C10H14N5O7P'
         '/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(22-10)1-21-23(18,19)20'
         '/h2-4,6-7,10,16-17H,1H2,(H2,11,12,13)(H2,18,19,20)'
         '/p-2/t4-,6-,7-,10-'
         '/m1'
         '/s1'))
     self.assertEqual(met.get_structure(), met.structure)
     self.assertEqual(met.get_empirical_formula(),
                      chem.EmpiricalFormula('C10H12N5O7P'))
     self.assertEqual(met.get_charge(), -2)
     self.assertAlmostEqual(met.get_mol_wt(), 345.20530, places=4)
Ejemplo n.º 17
0
    def test_ComplexSpeciesType(self):

        self.tmp_dirname = tempfile.mkdtemp()
        sequence_path = os.path.join(self.tmp_dirname, 'test_seq.fasta')
        with open(sequence_path, 'w') as f:
            f.write(
                '>dna1\nTTTATGAARGTNCTCATHAAYAARAAYGARCTCTAGTTTATGAARTTYAARTTYCTCCTCACNCCNCTCTAATTT\n'
            )

        dna1 = core.DnaSpeciesType(id='dna1', sequence_path=sequence_path)

        # Protein subunit 1
        gene1 = eukaryote_schema.GeneLocus(polymer=dna1, start=1, end=36)
        rna1 = eukaryote_schema.PreRnaSpeciesType(gene=gene1)
        exon1 = eukaryote_schema.ExonLocus(start=4, end=36)
        transcript1 = eukaryote_schema.TranscriptSpeciesType(rna=rna1,
                                                             exons=[exon1])
        cds1 = eukaryote_schema.CdsLocus(start=4, end=36)
        prot1 = eukaryote_schema.ProteinSpeciesType(transcript=transcript1,
                                                    coding_region=cds1)

        # Protein subunit 2
        gene2 = eukaryote_schema.GeneLocus(polymer=dna1, start=37, end=75)
        rna2 = eukaryote_schema.PreRnaSpeciesType(gene=gene2)
        exon2 = eukaryote_schema.ExonLocus(start=40, end=72)
        transcript2 = eukaryote_schema.TranscriptSpeciesType(rna=rna2,
                                                             exons=[exon2])
        cds2 = eukaryote_schema.CdsLocus(start=40, end=72)
        prot2 = eukaryote_schema.ProteinSpeciesType(transcript=transcript2,
                                                    coding_region=cds2)

        # Complex formation: (2) prot1 + (3) prot2 ==> complex1
        species_coeff1 = core.SpeciesTypeCoefficient(species_type=prot1,
                                                     coefficient=2)
        species_coeff2 = core.SpeciesTypeCoefficient(species_type=prot2,
                                                     coefficient=3)
        complex1 = core.ComplexSpeciesType(
            subunits=[species_coeff1, species_coeff2])

        self.assertEqual(complex1.get_charge(), 8)
        self.assertAlmostEqual(
            complex1.get_mol_wt(),
            (2 * prot1.get_mol_wt() + 3 * prot2.get_mol_wt()))
        self.assertEqual(complex1.get_empirical_formula(),
                         chem.EmpiricalFormula('C292H492N64O66S5'))

        shutil.rmtree(self.tmp_dirname)
Ejemplo n.º 18
0
    def deserialize(self, value):
        """ Deserialize value

        Args:
            value (:obj:`str`): semantically equivalent representation

        Returns:
            :obj:`tuple`:

                * :obj:`chem.EmpiricalFormula`: cleaned value
                * :obj:`core.InvalidAttribute`: cleaning error
        """
        if value:
            try:
                return (chem.EmpiricalFormula(value), None)
            except ValueError as error:
                return (None, core.InvalidAttribute(self, [str(error)]))
        return (None, None)
Ejemplo n.º 19
0
    def test_EmpiricalFormula___str__(self):
        f = chem.EmpiricalFormula('H2O')
        self.assertEqual(str(f), 'H2O')

        f = chem.EmpiricalFormula('OH2')
        self.assertEqual(str(f), 'H2O')

        f = chem.EmpiricalFormula('N0OH2')
        self.assertEqual(str(f), 'H2O')

        f = chem.EmpiricalFormula('H2O1.1')
        self.assertEqual(str(f), 'H2O1.1')

        f = chem.EmpiricalFormula('H2O1.1e-3')
        self.assertEqual(str(f), 'H2O0.0011')

        f = chem.EmpiricalFormula('H2O1.1e+3')
        self.assertEqual(str(f), 'H2O1100')

        f = chem.EmpiricalFormula('H2O-1.1e+3')
        self.assertEqual(str(f), 'H2O-1100')
Ejemplo n.º 20
0
    def test_get_empirical_formula(self):
        dna1 = core.DnaSpeciesType(id='dna3', sequence_path=self.sequence_path)
        gene1 = eukaryote.GeneLocus(polymer=dna1, start=1, end=4)

        exon1 = eukaryote.GenericLocus(start=1, end=1)
        transcript1 = eukaryote.TranscriptSpeciesType(gene=gene1,
                                                      exons=[exon1])
        self.assertEqual(transcript1.get_empirical_formula(),
                         chem.EmpiricalFormula('C10H12N5O7P'))

        exon2 = eukaryote.GenericLocus(start=2, end=2)
        transcript2 = eukaryote.TranscriptSpeciesType(gene=gene1,
                                                      exons=[exon2])
        self.assertEqual(transcript2.get_empirical_formula(),
                         chem.EmpiricalFormula('C9H12N3O8P'))

        exon3 = eukaryote.GenericLocus(start=3, end=3)
        transcript3 = eukaryote.TranscriptSpeciesType(gene=gene1,
                                                      exons=[exon3])
        self.assertEqual(transcript3.get_empirical_formula(),
                         chem.EmpiricalFormula('C10H12N5O8P'))

        exon4 = eukaryote.GenericLocus(start=4, end=4)
        transcript4 = eukaryote.TranscriptSpeciesType(gene=gene1,
                                                      exons=[exon4])
        self.assertEqual(transcript4.get_empirical_formula(),
                         chem.EmpiricalFormula('C9H11N2O9P'))

        dna2 = core.DnaSpeciesType(id='dna4', sequence_path=self.sequence_path)
        gene2 = eukaryote.GeneLocus(polymer=dna2, start=1, end=4)
        exon5_1 = eukaryote.GenericLocus(start=1, end=1)
        exon5_2 = eukaryote.GenericLocus(start=3, end=3)
        transcript5 = eukaryote.TranscriptSpeciesType(gene=gene2,
                                                      exons=[exon5_1, exon5_2])
        self.assertEqual(transcript5.get_empirical_formula(),
                         chem.EmpiricalFormula('C20H23N10O13P2'))

        # Test using input sequence
        test_trans = eukaryote.TranscriptSpeciesType()
        self.assertEqual(
            test_trans.get_empirical_formula(seq_input=Bio.Seq.Seq('AA')),
            chem.EmpiricalFormula('C20H23N10O13P2'))
Ejemplo n.º 21
0
    def test(self):
        attr = obj_tables.chem.ChemicalFormulaAttribute()
        primary_attr = obj_tables.chem.ChemicalFormulaAttribute(primary=True,
                                                                unique=True)
        self.assertEqual(attr.default, None)

        attr = obj_tables.chem.ChemicalFormulaAttribute(default='C1H1O2')
        self.assertEqual(attr.default, chem.EmpiricalFormula('C1H1O2'))

        attr = obj_tables.chem.ChemicalFormulaAttribute(
            default=chem.EmpiricalFormula('C1H1O2'))
        self.assertEqual(attr.default, chem.EmpiricalFormula('C1H1O2'))

        class Node(core.Model):
            value = obj_tables.chem.ChemicalFormulaAttribute()

        attr = Node.Meta.attributes['value']

        # deserialize
        self.assertEqual(attr.deserialize(''), (None, None))
        self.assertEqual(attr.deserialize(None), (None, None))
        self.assertEqual(attr.deserialize('X'),
                         (chem.EmpiricalFormula('X'), None))
        self.assertEqual(attr.deserialize('x')[0], None)
        self.assertNotEqual(attr.deserialize('x')[1], None)

        # serialize
        self.assertEqual(attr.serialize(''), '')
        self.assertEqual(attr.serialize(None), '')
        self.assertEqual(attr.serialize(chem.EmpiricalFormula('C1HO2')),
                         'CHO2')

        # deserialize + serialize
        self.assertEqual(attr.serialize(attr.deserialize('')[0]), '')
        self.assertEqual(attr.serialize(attr.deserialize(None)[0]), '')
        self.assertEqual(attr.serialize(attr.deserialize('CHO2')[0]), 'CHO2')

        # validate
        node = Node()
        self.assertEqual(attr.validate(node, None), None)
        self.assertEqual(attr.validate(node, chem.EmpiricalFormula('C1HO2')),
                         None)
        self.assertNotEqual(attr.validate(node, ''), None)
        self.assertNotEqual(attr.validate(node, 'x'), None)
        self.assertNotEqual(attr.validate(node, 1), None)

        attr2 = obj_tables.chem.ChemicalFormulaAttribute(primary=True)
        self.assertEqual(attr.validate(None, None), None)
        self.assertEqual(attr.validate(None, chem.EmpiricalFormula('C')), None)
        self.assertNotEqual(attr2.validate(None, None), None)
        self.assertEqual(attr2.validate(None, chem.EmpiricalFormula('C')),
                         None)

        # validate_unique
        nodes = [Node(), Node()]
        self.assertEqual(
            attr.validate_unique(nodes, [
                chem.EmpiricalFormula('CHO2'),
                chem.EmpiricalFormula('C2HO2')
            ]), None)
        self.assertNotEqual(
            attr.validate_unique(nodes, [
                chem.EmpiricalFormula('CHO2'),
                chem.EmpiricalFormula('C1HO2')
            ]), None)

        # to/from JSON
        self.assertEqual(attr.to_builtin(None), None)
        self.assertEqual(attr.to_builtin(''), None)
        self.assertEqual(attr.to_builtin(chem.EmpiricalFormula('CHO2')), {
            'C': 1,
            'H': 1,
            'O': 2
        })
        self.assertEqual(attr.to_builtin(chem.EmpiricalFormula('C1HO2')), {
            'C': 1,
            'H': 1,
            'O': 2
        })
        self.assertEqual(attr.from_builtin(None), None)
        self.assertEqual(attr.from_builtin(''), None)
        self.assertEqual(attr.from_builtin('CHO2'),
                         chem.EmpiricalFormula('CHO2'))
        self.assertEqual(attr.from_builtin('C1HO2'),
                         chem.EmpiricalFormula('CHO2'))
        self.assertEqual(attr.from_builtin({
            'C': 1,
            'H': 1,
            'O': 2
        }), chem.EmpiricalFormula('CHO2'))
        self.assertEqual(attr.from_builtin({
            'C': 1,
            'H': 1,
            'O': 2
        }), chem.EmpiricalFormula('C1HO2'))

        # get_xlsx_validation
        attr.get_xlsx_validation()
        primary_attr.get_xlsx_validation()
Ejemplo n.º 22
0
 def test_EmpiricalFormula___mul__(self):
     f = chem.EmpiricalFormula('H2O')
     self.assertEqual(str(f * 2), 'H4O2')
Ejemplo n.º 23
0
    def get_empirical_formula(self, table=1, cds=True, seq_input=None):
        """ Get the empirical formula

        Args:
            table (:obj:`int`, optional): NCBI identifier for translation table
                                        (default = standard table)
            cds (:obj:`bool`, optional): True indicates the sequence is a complete CDS
            seq_input (:obj:`Bio.Seq.Seq`, optional): if provided, the method will use it
                instead of reading from fasta file to reduce IO operation

        Returns:
            :obj:`chem.EmpiricalFormula`: empirical formula
        """
        if seq_input:
            seq = seq_input
        else:
            seq = self.get_seq(table=table, cds=cds)
        l = len(seq) - seq.count('*')

        n_a = seq.count('A')  # Ala: Alanine (C3 H7 N O2)
        n_r = seq.count('R')  # Arg: Arginine (C6 H14 N4 O2)
        n_n = seq.count('N')  # Asn: Asparagine (C4 H8 N2 O3)
        n_d = seq.count('D')  # Asp: Aspartic acid (C4 H7 N O4)
        n_c = seq.count('C')  # Cys: Cysteine (C3 H7 N O2 S)

        n_q = seq.count('Q')  # Gln: Glutamine (C5 H10 N2 O3)
        n_e = seq.count('E')  # Glu: Glutamic acid (C5 H9 N O4)
        n_g = seq.count('G')  # Gly: Glycine (C2 H5 N O2)
        n_h = seq.count('H')  # His: Histidine (C6 H9 N3 O2)
        n_i = seq.count('I')  # Ile: Isoleucine (C6 H13 N O2)

        n_l = seq.count('L')  # Leu: Leucine (C6 H13 N O2)
        n_k = seq.count('K')  # Lys: Lysine (C6 H14 N2 O2)
        n_m = seq.count('M')  # Met: Methionine (C5 H11 N O2 S)
        n_f = seq.count('F')  # Phe: Phenylalanine (C9 H11 N O2)
        n_p = seq.count('P')  # Pro: Proline (C5 H9 N O2)

        n_s = seq.count('S')  # Ser: Serine (C3 H7 N O3)
        n_t = seq.count('T')  # Thr: Threonine (C4 H9 N O3)
        n_w = seq.count('W')  # Trp: Tryptophan (C11 H12 N2 O2)
        n_y = seq.count('Y')  # Tyr: Tyrosine (C9 H11 N O3)
        n_v = seq.count('V')  # Val: Valine (C5 H11 N O2)

        n_u = seq.count('U')  # Selcys: Selenocysteine (C3 H7 N O2 Se)

        formula = chem.EmpiricalFormula()

        formula.C = 3 * n_a + 6 * n_r + 4 * n_n + 4 * n_d + 3 * n_c + \
            5 * n_q + 5 * n_e + 2 * n_g + 6 * n_h + 6 * n_i + \
            6 * n_l + 6 * n_k + 5 * n_m + 9 * n_f + 5 * n_p + \
            3 * n_s + 4 * n_t + 11 * n_w + 9 * n_y + 5 * n_v + \
            3 * n_u

        formula.H = 7 * n_a + 14 * n_r + 8 * n_n + 7 * n_d + 7 * n_c + \
            10 * n_q + 9 * n_e + 5 * n_g + 9 * n_h + 13 * n_i + \
            13 * n_l + 14 * n_k + 11 * n_m + 11 * n_f + 9 * n_p + \
            7 * n_s + 9 * n_t + 12 * n_w + 11 * n_y + 11 * n_v + \
            7 * n_u - 2 * (l - 1)

        formula.N = 1 * n_a + 4 * n_r + 2 * n_n + 1 * n_d + 1 * n_c + \
            2 * n_q + 1 * n_e + 1 * n_g + 3 * n_h + 1 * n_i + \
            1 * n_l + 2 * n_k + 1 * n_m + 1 * n_f + 1 * n_p + \
            1 * n_s + 1 * n_t + 2 * n_w + 1 * n_y + 1 * n_v + \
            1 * n_u

        formula.O = 2 * n_a + 2 * n_r + 3 * n_n + 4 * n_d + 2 * n_c + \
            3 * n_q + 4 * n_e + 2 * n_g + 2 * n_h + 2 * n_i + \
            2 * n_l + 2 * n_k + 2 * n_m + 2 * n_f + 2 * n_p + \
            3 * n_s + 3 * n_t + 2 * n_w + 3 * n_y + 2 * n_v + \
            2 * n_u - (l - 1)

        formula.S = n_c + n_m

        formula.Se = n_u

        return formula
Ejemplo n.º 24
0
 def test_get_empirical_formula(self):
     # Default translation table used is 1 (standard)
     self.assertEqual(self.prot1.get_empirical_formula(),
                      chem.EmpiricalFormula('C53H96N14O15S1'))
     self.assertEqual(self.prot2.get_empirical_formula(),
                      chem.EmpiricalFormula('C53H91N11O11S1'))
Ejemplo n.º 25
0
 def test_EmpiricalFormula___sub__(self):
     f = chem.EmpiricalFormula('H2O')
     g = chem.EmpiricalFormula('HO')
     self.assertEqual(str(f - g), 'H')
     self.assertEqual(str(f - 'HO'), 'H')
Ejemplo n.º 26
0
 def test_EmpiricalFormula_get_attr(self):
     f = chem.EmpiricalFormula()
     self.assertEqual(f.C, 0)
     self.assertEqual(f['C'], 0)
Ejemplo n.º 27
0
 def test_EmpiricalFormula___truediv__(self):
     f = chem.EmpiricalFormula('H4O2')
     self.assertEqual(f / 2, chem.EmpiricalFormula({'H': 2, 'O': 1}))
Ejemplo n.º 28
0
    def test(self):
        self.tmp_dirname = tempfile.mkdtemp()
        filepath = os.path.join(self.tmp_dirname, 'test_seq.fasta')
        with open(filepath, 'w') as f:
            f.write('>dna1\nACGTACGT\n' '>dna2\nACGTACGTNNNN\n')

        dna = core.DnaSpeciesType(id='dna1',
                                  name='dna1',
                                  sequence_path=filepath,
                                  circular=False,
                                  double_stranded=False,
                                  ploidy=2)

        self.assertEqual(dna.id, 'dna1')
        self.assertEqual(dna.name, 'dna1')
        self.assertEqual(dna.circular, False)
        self.assertEqual(dna.double_stranded, False)
        self.assertEqual(dna.ploidy, 2)

        L = dna.get_len()
        self.assertEqual(
            dna.get_empirical_formula(),
            chem.EmpiricalFormula('C10H12N5O6P') * 2 +
            chem.EmpiricalFormula('C9H12N3O7P') * 2 +
            chem.EmpiricalFormula('C10H12N5O7P') * 2 +
            chem.EmpiricalFormula('C10H13N2O8P') * 2 -
            chem.EmpiricalFormula('OH') * (L - 1))

        self.assertEqual(dna.get_charge(), -L - 1)
        exp_mol_wt = \
            + Bio.SeqUtils.molecular_weight(dna.get_seq(),
                                            seq_type='DNA',
                                            circular=dna.circular,
                                            double_stranded=dna.double_stranded) \
            - 9 * mendeleev.element('H').atomic_weight

        self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0)

        # Make DNA circular, single stranded
        dna.circular = True
        dna.double_stranded = False

        self.assertEqual(
            dna.get_empirical_formula(),
            chem.EmpiricalFormula('C10H12N5O6P') * 2 +
            chem.EmpiricalFormula('C9H12N3O7P') * 2 +
            chem.EmpiricalFormula('C10H12N5O7P') * 2 +
            chem.EmpiricalFormula('C10H13N2O8P') * 2 -
            chem.EmpiricalFormula('OH') * L)

        self.assertEqual(dna.get_charge(), -L)

        exp_mol_wt = \
            + Bio.SeqUtils.molecular_weight(dna.get_seq(),
                                            seq_type='DNA',
                                            circular=dna.circular,
                                            double_stranded=dna.double_stranded) \
            - 8 * mendeleev.element('H').atomic_weight

        self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0)

        # Make DNA linear, double stranded
        dna.circular = False
        dna.double_stranded = True

        self.assertEqual(
            dna.get_empirical_formula(),
            chem.EmpiricalFormula('C10H12N5O6P') * 2 * 2 +
            chem.EmpiricalFormula('C9H12N3O7P') * 2 * 2 +
            chem.EmpiricalFormula('C10H12N5O7P') * 2 * 2 +
            chem.EmpiricalFormula('C10H13N2O8P') * 2 * 2 -
            chem.EmpiricalFormula('OH') * (L - 1) * 2)

        self.assertEqual(dna.get_charge(), 2 * (-L - 1))
        exp_mol_wt = \
            + Bio.SeqUtils.molecular_weight(dna.get_seq(),
                                            seq_type='DNA',
                                            circular=dna.circular,
                                            double_stranded=dna.double_stranded) \
            - 18 * mendeleev.element('H').atomic_weight

        self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0)

        # Make DNA circular, double stranded
        dna.circular = True
        dna.double_stranded = True

        self.assertEqual(
            dna.get_empirical_formula(),
            chem.EmpiricalFormula('C10H12N5O6P') * 2 * 2 +
            chem.EmpiricalFormula('C9H12N3O7P') * 2 * 2 +
            chem.EmpiricalFormula('C10H12N5O7P') * 2 * 2 +
            chem.EmpiricalFormula('C10H13N2O8P') * 2 * 2 -
            chem.EmpiricalFormula('OH') * L * 2)

        self.assertEqual(dna.get_charge(), 2 * -L)

        exp_mol_wt = \
            + Bio.SeqUtils.molecular_weight(dna.get_seq(),
                                            seq_type='DNA',
                                            circular=dna.circular,
                                            double_stranded=dna.double_stranded) \
            - 16 * mendeleev.element('H').atomic_weight

        self.assertAlmostEqual(dna.get_mol_wt(), exp_mol_wt, places=0)

        # If there are N's in the DNA sequence
        dna2 = core.DnaSpeciesType(id='dna2',
                                   sequence_path=filepath,
                                   circular=False,
                                   double_stranded=True)

        L = dna2.get_len()
        self.assertEqual(
            dna2.get_empirical_formula(),
            chem.EmpiricalFormula('C10H12N5O6P') * 3 * 2 +
            chem.EmpiricalFormula('C9H12N3O7P') * 3 * 2 +
            chem.EmpiricalFormula('C10H12N5O7P') * 3 * 2 +
            chem.EmpiricalFormula('C10H13N2O8P') * 3 * 2 -
            chem.EmpiricalFormula('OH') * (L - 1) * 2)

        shutil.rmtree(self.tmp_dirname)
Ejemplo n.º 29
0
 def test_EmpiricalFormula___contains__(self):
     f = chem.EmpiricalFormula('H2O')
     self.assertIn('H', f)
     self.assertIn('C', f)
     self.assertNotIn('Ccc', f)
Ejemplo n.º 30
0
    def test_EmpiricalFormula_constructor(self):
        f = chem.EmpiricalFormula()
        self.assertEqual(f, {})

        f = chem.EmpiricalFormula('H')
        self.assertEqual(f, {'H': 1})

        f = chem.EmpiricalFormula('H2')
        self.assertEqual(f, {'H': 2})

        f = chem.EmpiricalFormula('H2.5')
        self.assertEqual(f, {'H': 2.5})

        f = chem.EmpiricalFormula('H2.5e3')
        self.assertEqual(f, {'H': 2.5e3})

        f = chem.EmpiricalFormula('H-2.5e3')
        self.assertEqual(f, {'H': -2.5e3})

        f = chem.EmpiricalFormula('H2.5e+3')
        self.assertEqual(f, {'H': 2.5e3})

        f = chem.EmpiricalFormula('H2.5e-3')
        self.assertEqual(f, {'H': 2.5e-3})

        f = chem.EmpiricalFormula('He2')
        self.assertEqual(f, {'He': 2})

        f = chem.EmpiricalFormula('He-2')
        self.assertEqual(f, {'He': -2})

        f = chem.EmpiricalFormula('He-20')
        self.assertEqual(f, {'He': -20})

        f = chem.EmpiricalFormula('H2O')
        self.assertEqual(f, {'H': 2, 'O': 1})

        f = chem.EmpiricalFormula('He-20He30')
        self.assertEqual(f, {'He': 10})

        f = chem.EmpiricalFormula('RaRb')
        self.assertEqual(f, {'Ra': 1, 'Rb': 1})

        f = chem.EmpiricalFormula(attrdict.AttrDict({'Ra': 1, 'Rb': 1}))
        self.assertEqual(f, {'Ra': 1, 'Rb': 1})

        f = chem.EmpiricalFormula(attrdict.AttrDefault(int, {
            'Ra': 1,
            'Rb': 1
        }))
        self.assertEqual(f, {'Ra': 1, 'Rb': 1})

        f = chem.EmpiricalFormula(chem.EmpiricalFormula('RaRb'))
        self.assertEqual(f, {'Ra': 1, 'Rb': 1})

        with self.assertRaisesRegex(ValueError, 'not a valid formula'):
            chem.EmpiricalFormula('Hee2')

        with self.assertRaisesRegex(ValueError, 'not a valid formula'):
            chem.EmpiricalFormula('h2')