コード例 #1
0
    def setUp(self):
        """
        Set up tests.
        """
        self.temp_dir = tempfile.mkdtemp()
        self.map = {
            'CID645443':
            'Cc1ccc(-n2c3c(cc(C(=O)Nc4cccc(C)n4)c2=O)C(=O)CCC3)cc1',
            'CID2997889': 'CC(C)(C)C(=O)Nc1ccc(-c2cn3ccsc3n2)cc1',
            'CID2244': 'CC(=O)Oc1ccccc1C(=O)O',
            'CID2662': 'Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1',
            'CID3672': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1'
        }
        _, self.map_filename = tempfile.mkstemp(dir=self.temp_dir,
                                                suffix='.pkl')
        write_pickle(self.map, self.map_filename)

        # use a subset of AID588342
        # note that CID 654924 is duplicated
        this_dir = os.path.split(os.path.realpath(__file__))[0]
        self.data_filename = os.path.join(this_dir, 'data/test_pcba_data.csv')

        # set up parser
        # settings match PcbaParser defaults
        self.engine = AssayDataParser(self.data_filename,
                                      self.map_filename,
                                      delimiter=',',
                                      primary_key='PUBCHEM_CID',
                                      activity_key='PUBCHEM_ACTIVITY_OUTCOME',
                                      activity_value='Active',
                                      id_prefix='CID')
コード例 #2
0
    def setUp(self):
        """
        Set up tests.
        """
        self.temp_dir = tempfile.mkdtemp()
        self.map = {
            "CID645443": "Cc1ccc(-n2c3c(cc(C(=O)Nc4cccc(C)n4)c2=O)C(=O)CCC3)cc1",
            "CID2997889": "CC(C)(C)C(=O)Nc1ccc(-c2cn3ccsc3n2)cc1",
            "CID2244": "CC(=O)Oc1ccccc1C(=O)O",
            "CID2662": "Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1",
            "CID3672": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
        }
        _, self.map_filename = tempfile.mkstemp(dir=self.temp_dir, suffix=".pkl")
        write_pickle(self.map, self.map_filename)

        # use a subset of AID588342
        # note that CID 654924 is duplicated
        this_dir = os.path.split(os.path.realpath(__file__))[0]
        self.data_filename = os.path.join(this_dir, "data/test_pcba_data.csv")

        # set up parser
        # settings match PcbaParser defaults
        self.engine = AssayDataParser(
            self.data_filename,
            self.map_filename,
            delimiter=",",
            primary_key="PUBCHEM_CID",
            activity_key="PUBCHEM_ACTIVITY_OUTCOME",
            activity_value="Active",
            id_prefix="CID",
        )
コード例 #3
0
class TestAssayDataParser(unittest.TestCase):
    """
    Tests for AssayDataParser.
    """
    def setUp(self):
        """
        Set up tests.
        """
        self.temp_dir = tempfile.mkdtemp()
        self.map = {
            'CID645443':
            'Cc1ccc(-n2c3c(cc(C(=O)Nc4cccc(C)n4)c2=O)C(=O)CCC3)cc1',
            'CID2997889': 'CC(C)(C)C(=O)Nc1ccc(-c2cn3ccsc3n2)cc1',
            'CID2244': 'CC(=O)Oc1ccccc1C(=O)O',
            'CID2662': 'Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1',
            'CID3672': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1'
        }
        _, self.map_filename = tempfile.mkstemp(dir=self.temp_dir,
                                                suffix='.pkl')
        write_pickle(self.map, self.map_filename)

        # use a subset of AID588342
        # note that CID 654924 is duplicated
        this_dir = os.path.split(os.path.realpath(__file__))[0]
        self.data_filename = os.path.join(this_dir, 'data/test_pcba_data.csv')

        # set up parser
        # settings match PcbaParser defaults
        self.engine = AssayDataParser(self.data_filename,
                                      self.map_filename,
                                      delimiter=',',
                                      primary_key='PUBCHEM_CID',
                                      activity_key='PUBCHEM_ACTIVITY_OUTCOME',
                                      activity_value='Active',
                                      id_prefix='CID')

    def tearDown(self):
        """
        Clean up tests.
        """
        shutil.rmtree(self.temp_dir)

    def test_read_data(self):
        """
        Test AssayDataParser.read_data.
        """
        data = self.engine.read_data()
        assert data.shape[0] == 4  # CID 654924 is duplicated
        assert np.array_equal(data.PUBCHEM_CID,
                              [645443, 645449, 654924, 2997889])

    def test_map_ids_to_smiles(self):
        """
        Test AssayDataParser.map_ids_to_smiles.
        """
        data = self.engine.read_data()
        id_map = read_pickle(self.map_filename)
        smiles, indices = self.engine.map_ids_to_smiles(
            data.PUBCHEM_CID, id_map)
        assert len(smiles) == len(indices) == 2
        assert smiles[0] == self.map['CID645443']
        assert smiles[1] == self.map['CID2997889']
        assert np.array_equal(indices, [0, 3])
コード例 #4
0
class TestAssayDataParser(unittest.TestCase):
    """
    Tests for AssayDataParser.
    """

    def setUp(self):
        """
        Set up tests.
        """
        self.temp_dir = tempfile.mkdtemp()
        self.map = {
            "CID645443": "Cc1ccc(-n2c3c(cc(C(=O)Nc4cccc(C)n4)c2=O)C(=O)CCC3)cc1",
            "CID2997889": "CC(C)(C)C(=O)Nc1ccc(-c2cn3ccsc3n2)cc1",
            "CID2244": "CC(=O)Oc1ccccc1C(=O)O",
            "CID2662": "Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1",
            "CID3672": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
        }
        _, self.map_filename = tempfile.mkstemp(dir=self.temp_dir, suffix=".pkl")
        write_pickle(self.map, self.map_filename)

        # use a subset of AID588342
        # note that CID 654924 is duplicated
        this_dir = os.path.split(os.path.realpath(__file__))[0]
        self.data_filename = os.path.join(this_dir, "data/test_pcba_data.csv")

        # set up parser
        # settings match PcbaParser defaults
        self.engine = AssayDataParser(
            self.data_filename,
            self.map_filename,
            delimiter=",",
            primary_key="PUBCHEM_CID",
            activity_key="PUBCHEM_ACTIVITY_OUTCOME",
            activity_value="Active",
            id_prefix="CID",
        )

    def tearDown(self):
        """
        Clean up tests.
        """
        shutil.rmtree(self.temp_dir)

    def test_read_data(self):
        """
        Test AssayDataParser.read_data.
        """
        data = self.engine.read_data()
        assert data.shape[0] == 4  # CID 654924 is duplicated
        assert np.array_equal(data.PUBCHEM_CID, [645443, 645449, 654924, 2997889])

    def test_map_ids_to_smiles(self):
        """
        Test AssayDataParser.map_ids_to_smiles.
        """
        data = self.engine.read_data()
        id_map = read_pickle(self.map_filename)
        smiles, indices = self.engine.map_ids_to_smiles(data.PUBCHEM_CID, id_map)
        assert len(smiles) == len(indices) == 2
        assert smiles[0] == self.map["CID645443"]
        assert smiles[1] == self.map["CID2997889"]
        assert np.array_equal(indices, [0, 3])