Example #1
0
 def test_is_empty_if_no_spectra(self):
     m1 = Molecule(name='test', sum_formula="C1H2O3")
     m1.save()
     s1 = Standard(molecule=m1)
     s1.save()
     molecule_table, molecules_with_spectra = self.get_table_and_count()
     self.assertEqual(len(molecule_table.rows), 1)
     self.assertEqual(molecules_with_spectra, 0)
Example #2
0
 def test_get_mz(self):
     m1 = Molecule(name='test', sum_formula="C1H2O3")
     m1.save()
     a1 = Adduct(nM=1, delta_formula='-H', charge=-1)
     a1.save()
     s1 = Standard(molecule=m1)
     s1.save()
     self.assertEqual(Standard.objects.all().count(), 1)
     self.assertAlmostEqual(s1.molecule.get_mz(a1), 60.99311, places=4)
Example #3
0
 def test_is_not_empty_if_annotated(self):
     m1 = Molecule(name='test', sum_formula="C1H2O3")
     m1.save()
     s1 = Standard(molecule=m1)
     s1.save()
     d1 = Dataset()
     d1.save()
     fs1 = FragmentationSpectrum(ms1_intensity=42, dataset=d1, standard=s1)
     fs1.save()
     molecule_table, molecules_with_spectra = self.get_table_and_count()
     self.assertEqual(len(molecule_table.rows), 1)
     self.assertEqual(molecules_with_spectra, 1)
Example #4
0
    def test_xic_and_standard_and_adduct(self):
        # create some datasets
        d1 = Dataset(name='Dataset1')
        d1.save()
        a1 = Adduct(nM=1, delta_formula='-H', charge=-1)
        a1.save()
        m1 = Molecule(name='TestMolecule1', sum_formula="C1H2O3")
        m1.save()
        s1 = Standard(molecule=m1, inventory_id="0")
        s1.save()
        # create some xics
        x1 = Xic(mz=60.993, dataset=d1)
        xic = [1.0, 2.0, 3.0, 4.0, 5.0]
        x1.set_xic(xic)
        x1.standard = s1
        x1.adduct = a1
        x1.save()
        self.assertEqual(Xic.objects.all().count(), 1)
        self.assertEqual(Dataset.objects.all().count(), 1)
        self.assertEqual(Standard.objects.all().count(), 1)
        # mass check
        with self.assertRaises(ValueError):
            x1.mz = 123.993
            x1.save()
            x1.check_mass()

        def test_xic_mass_filter(self):
            d1 = Dataset(name='dataset')
            d1.save()
            mz = 60.993
            # three larger
            Xic(mz=mz + 5., dataset=d1).save()
            Xic(mz=mz + 10., dataset=d1).save()
            Xic(mz=mz + 15., dataset=d1).save()
            # three approx equal
            Xic(mz=mz + 0.005, dataset=d1).save()
            Xic(mz=mz + 0.0, dataset=d1).save()
            Xic(mz=mz - 0.0015, dataset=d1).save()
            # three smaller
            Xic(mz=mz - 5., dataset=d1).save()
            Xic(mz=mz - 10., dataset=d1).save()
            Xic(mz=mz - 15., dataset=d1).save()
            # three approx equal from another dataset
            d2 = Dataset(name='dataset2')
            d2.save()
            Xic(mz=mz + 0.005, dataset=d2).save()
            Xic(mz=mz + 0.0, dataset=d2).save()
            Xic(mz=mz - 0.0015, dataset=d2).save()
            self.assertEqual(Xic.objects.all().count(), 12)
            xics = Xic.objects.all().filter(dataset=d1).filter(mz__gte=mz + 0.01).filter(mz__lte=mz - 0.01)
            self.assertEqual(xics.objects.all().count(), 3)
Example #5
0
 def test_add_molecule(self):
     m1 = Molecule(
         name='test_molecule',
         sum_formula='C1H2O3',
         inchi_code="str",
         solubility="none",
         hmdb_id="000235",
         chebi_id="123456",
         lipidmaps_id="558855",
         cas_id="789456",
         pubchem_id="1235")
     m1.save()
     self.assertEqual(Molecule.objects.all().count(), 1)
     self.assertAlmostEqual(m1.exact_mass, 62.00039, places=4)
Example #6
0
 def test_single(self):
     m1 = Molecule(name='test', sum_formula="C1H2O3")
     m1.save()
     s1 = Standard(
         inventory_id=0,
         molecule=m1,
         vendor="sigma",
         vendor_cat="sig0001",
         lot_num="#123456",
         location="fridge",
         purchase_date=datetime.datetime.now(),
     )
     s1.save()
     standard_table = self.client.get('/inventory/').context['standard_list']
     self.assertEqual(len(standard_table.rows), 1)
Example #7
0
 def test_add_standard(self):
     m1 = Molecule(name='test', sum_formula="C1H2O3")
     m1.save()
     s1 = Standard(
         inventory_id=0,
         molecule=m1,
         vendor="sigma",
         vendor_cat="sig0001",
         lot_num="#123456",
         location="fridge",
         purchase_date=datetime.datetime.now(),
     )
     s1.save()
     self.assertEqual(s1.inventory_id, 0)
     self.assertEqual(Standard.objects.all().count(), 1)
     self.assertAlmostEqual(s1.molecule.exact_mass, 62.00039, places=4)
Example #8
0
 def test_add_xic(self):
     m1 = Molecule(name='TestMolecule1', sum_formula="C1H2O3")
     m1.save()
     s1 = Standard(molecule=m1, inventory_id="0")
     s1.save()
     a1 = Adduct(nM=1, delta_formula='+H+K', charge=-2)
     a1.save()
     d1 = Dataset(name='Dataset1')
     d1.save()
     d1.standards_present.add(s1)
     d1.adducts_present.add(a1)
     x1 = Xic(mz='0.0', dataset=d1)
     xic = [1.0, 2.0, 3.0, 4.0, 5.0]
     x1.set_xic(xic)
     x1.save()
     self.assertEqual(Xic.objects.all().count(), 1)
     np.testing.assert_array_almost_equal(xic, x1.xic)
Example #9
0
 def test_clean_db(self):
     # clean should remove any molecules without a standard
     m1 = Molecule(name="TestMolecule1", sum_formula="C1H2O3")
     m1.save()
     m2 = Molecule(name="TestMolecule2", sum_formula="C2H2O3")
     m2.save()
     s1 = Standard(molecule=m1, inventory_id="0")
     s1.save()
     clear_molecules_without_standard()
     self.assertEqual(Molecule.objects.all().count(), 1)
Example #10
0
def load_molecules(filename: str, dbname='#'):
    '''### load molecules from fasta file
        #### params:
        - filename: Your file name
        - dbname: Name of your data base

        *returns* -> dict with molecules
    '''
    molecules = {}
    with open(filename) as file:
        lines = file.read()
        molecules_str = lines.split('>')[1::]

        for molecule_str in molecules_str:
            mol_str = molecule_str.split('\n', 1)
            mol_str[1] = mol_str[1].replace('\n', '')

            ignorable_alleles = ['N', 'L', 'Q', 'S', 'A', 'C']

            name = mol_str[0].split(' ')
            if len(name) > 2:
                name = name[1]
            elif len(name) > 1:
                name = name[0]
            else:
                name = name[0]

            if any(name.endswith(allele) for allele in ignorable_alleles):
                print('ignorated ', name)
                continue

            if name:
                if name.find(':') > 2:
                    name = ':'.join(name.split(':', 2)[:2])

                if any(subname in molecules for subname in name):
                    continue

            else:
                name = 'None'

            mol = Molecule(dbname=dbname, name=name, seq=mol_str[1])
            molecules[name] = mol

    return molecules
Example #11
0
 def setUpTestData(cls):
     d1 = Dataset(name='Dataset1')
     d1.save()
     m1 = Molecule(sum_formula='H2O')
     m1.save()
     m2 = Molecule(sum_formula='O2')
     m2.save()
     s1 = Standard(molecule=m1)
     s1.save()
     s2 = Standard(molecule=m1)
     s2.save()
     s3 = Standard(molecule=m2)
     s3.save()
     FragmentationSpectrum.objects.create(precursor_mz='123.456', spec_num=0, dataset=d1, standard=s1)
     FragmentationSpectrum.objects.create(precursor_mz='123.45', spec_num=0, dataset=d1, standard=s2)
     FragmentationSpectrum.objects.create(precursor_mz='123.4', spec_num=0, dataset=d1, standard=s3)
     cls.m_onespectrum = m2
     cls.m_twospectra = m1
Example #12
0
 def test_add_dataset(self):
     # create standards
     m1 = Molecule(name='TestMolecule1', sum_formula="C1H2O3")
     m1.save()
     m2 = Molecule(name='TestMolecule1', sum_formula="C2H2O3")
     m2.save()
     s1 = Standard(molecule=m1, inventory_id="0")
     s1.save()
     s2 = Standard(molecule=m2, inventory_id="1")
     s2.save()
     # create adduct
     a1 = Adduct(nM=1, delta_formula='+H+K', charge=-2)
     a1.save()
     # create a dataset
     d1 = Dataset(name='Dataset1')
     d1.save()
     d1.standards_present.add(s1)
     d1.standards_present.add(s2)
     d1.adducts_present.add(a1)
     self.assertEqual(Dataset.objects.all().count(), 1)
     self.assertEqual(Dataset.objects.all()[0].standards_present.count(), 2)
Example #13
0
def add_batch_standard(metadata, csv_file):
    """
    handle a csv fil of standards
    header line should be "mcfid","name","formula", "inchi", "solubility", "vendor","vendor_id", "hmdb_id" , "chebi_id", "lipidmaps_id", "cas_id", "pubchem_id". "date","location","lot_num"

    To Be Set:
    ### Standard
    # mandatory
    molecule = models.ForeignKey(Molecule, default=Molecule.objects.all().filter(name='DUMMY'))
    MCFID = models.IntegerField(null=True, blank=True)# if blank MCFID == Standard.pk
    # optional
    vendor = models.TextField(null=True, blank=True)
    vendor_cat = models.TextField(null=True, blank=True)
    lot_num = models.TextField(null=True, blank=True)
    location = models.TextField(null=True, blank=True)
    purchase_date = models.DateField(null=True, blank=True)

    If Not Existing:
    ### Molecule
    # mandatory
    name = models.TextField(default = "")
    sum_formula = models.TextField(null=True)
    pubchem_id = models.TextField(null=True, blank=True)
    # Optional
    inchi_code = models.TextField(default="")
    exact_mass = models.FloatField(default=0.0)
    solubility = models.TextField(null=True, blank=True)
    # External reference numbers
    hmdb_id = models.TextField(null=True, blank=True)
    chebi_id = models.TextField(null=True, blank=True)
    lipidmaps_id = models.TextField(null=True, blank=True)
    cas_id = models.TextField(null=True, blank=True)
    :param csv_file:
    :return:
    """
    error_list = []
    df = pd.read_csv(csv_file, sep="\t", dtype=unicode)
    logging.info( 'I read the file')
    df.columns = [x.replace(" ", "_").lower() for x in df.columns]
    logging.info("I replaced columns")
    df = df.fillna("")
    # df = df.applymap(to_unicode)
    logging.info("Shape: {}".format(df.shape))
    for row in df.iterrows():
        logging.info("row: {}".format(row))
        try:
            # clean up input
            entry = row[1]
            if entry['formula'] == '':
                raise ValueError('sum formula cannot be blank')
            # for tag in entry.keys():
            #    if entry[tag] != "":
            #        entry[tag] = entry[tag].encode("utf8") # make strings safe

            entry['id'] = ''.join([char for char in entry['id'] if char in ("0123456789")])

            if entry['pubchem_id'] != "":
                molecule = Molecule.objects.all().filter(pubchem_id=entry['pubchem_id'])
            else:
                molecule = Molecule.objects.all().filter(name__iexact=entry['name'])  # filter lowercase

            if molecule.exists():
                molecule = molecule[0]
            else:
                molecule = Molecule(
                    name=entry["name"],
                    sum_formula=entry["formula"],
                    inchi_code=entry["inchi"],
                    solubility=entry["solubility"],
                    hmdb_id=entry["hmdb_id"],
                    chebi_id=entry["chebi_id"],
                    lipidmaps_id=entry["lipidmaps_id"],
                    cas_id=entry["cas_id"],
                    pubchem_id=entry["pubchem_id"],
                )
                logging.info("about to save " + molecule.name)
                logging.info(molecule)
                molecule.save()
                logging.info("Successfully saved " + molecule.name)

            s = Standard.objects.all().filter(inventory_id=entry['id'])
            if s.exists():  # standard already added, overwrite
                s = s[0]
            else:
                s = Standard(molecule=molecule)
                s.save()
            s.vendor = entry["vendor"]
            s.vendor_cat = entry["vendor_id"]
            s.lot_num = entry["lot_num"]
            if entry["purchase_date"] != '':
                s.purchase_date = dateutil.parser.parse(entry["purchase_date"], fuzzy=True)
            s.save()
            if entry["id"] == []:
                s.inventory_id = s.pk
            else:
                s.inventory_id = entry['id']
            s.save()
        except:
            error_list.append([entry['name'], sys.exc_info()[1]])
            logging.warning("Failed for: {} with {}".format(entry['name'], sys.exc_info()[1]))

    return error_list