def setUp(self): """ Set up tests. """ self.temp_dir = tempfile.mkdtemp() self.map = { 'CID645443': 'Cc1ccc(-n2c3c(cc(C(=O)Nc4cccc(C)n4)c2=O)C(=O)CCC3)cc1', 'CID2997889': 'CC(C)(C)C(=O)Nc1ccc(-c2cn3ccsc3n2)cc1', 'CID2244': 'CC(=O)Oc1ccccc1C(=O)O', 'CID2662': 'Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1', 'CID3672': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1' } _, self.map_filename = tempfile.mkstemp(dir=self.temp_dir, suffix='.pkl') write_pickle(self.map, self.map_filename) # use a subset of AID588342 # note that CID 654924 is duplicated this_dir = os.path.split(os.path.realpath(__file__))[0] self.data_filename = os.path.join(this_dir, 'data/test_pcba_data.csv') # set up parser # settings match PcbaParser defaults self.engine = AssayDataParser(self.data_filename, self.map_filename, delimiter=',', primary_key='PUBCHEM_CID', activity_key='PUBCHEM_ACTIVITY_OUTCOME', activity_value='Active', id_prefix='CID')
def setUp(self): """ Set up tests. """ self.temp_dir = tempfile.mkdtemp() self.map = { "CID645443": "Cc1ccc(-n2c3c(cc(C(=O)Nc4cccc(C)n4)c2=O)C(=O)CCC3)cc1", "CID2997889": "CC(C)(C)C(=O)Nc1ccc(-c2cn3ccsc3n2)cc1", "CID2244": "CC(=O)Oc1ccccc1C(=O)O", "CID2662": "Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1", "CID3672": "CC(C)Cc1ccc(C(C)C(=O)O)cc1", } _, self.map_filename = tempfile.mkstemp(dir=self.temp_dir, suffix=".pkl") write_pickle(self.map, self.map_filename) # use a subset of AID588342 # note that CID 654924 is duplicated this_dir = os.path.split(os.path.realpath(__file__))[0] self.data_filename = os.path.join(this_dir, "data/test_pcba_data.csv") # set up parser # settings match PcbaParser defaults self.engine = AssayDataParser( self.data_filename, self.map_filename, delimiter=",", primary_key="PUBCHEM_CID", activity_key="PUBCHEM_ACTIVITY_OUTCOME", activity_value="Active", id_prefix="CID", )
class TestAssayDataParser(unittest.TestCase): """ Tests for AssayDataParser. """ def setUp(self): """ Set up tests. """ self.temp_dir = tempfile.mkdtemp() self.map = { 'CID645443': 'Cc1ccc(-n2c3c(cc(C(=O)Nc4cccc(C)n4)c2=O)C(=O)CCC3)cc1', 'CID2997889': 'CC(C)(C)C(=O)Nc1ccc(-c2cn3ccsc3n2)cc1', 'CID2244': 'CC(=O)Oc1ccccc1C(=O)O', 'CID2662': 'Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1', 'CID3672': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1' } _, self.map_filename = tempfile.mkstemp(dir=self.temp_dir, suffix='.pkl') write_pickle(self.map, self.map_filename) # use a subset of AID588342 # note that CID 654924 is duplicated this_dir = os.path.split(os.path.realpath(__file__))[0] self.data_filename = os.path.join(this_dir, 'data/test_pcba_data.csv') # set up parser # settings match PcbaParser defaults self.engine = AssayDataParser(self.data_filename, self.map_filename, delimiter=',', primary_key='PUBCHEM_CID', activity_key='PUBCHEM_ACTIVITY_OUTCOME', activity_value='Active', id_prefix='CID') def tearDown(self): """ Clean up tests. """ shutil.rmtree(self.temp_dir) def test_read_data(self): """ Test AssayDataParser.read_data. """ data = self.engine.read_data() assert data.shape[0] == 4 # CID 654924 is duplicated assert np.array_equal(data.PUBCHEM_CID, [645443, 645449, 654924, 2997889]) def test_map_ids_to_smiles(self): """ Test AssayDataParser.map_ids_to_smiles. """ data = self.engine.read_data() id_map = read_pickle(self.map_filename) smiles, indices = self.engine.map_ids_to_smiles( data.PUBCHEM_CID, id_map) assert len(smiles) == len(indices) == 2 assert smiles[0] == self.map['CID645443'] assert smiles[1] == self.map['CID2997889'] assert np.array_equal(indices, [0, 3])
class TestAssayDataParser(unittest.TestCase): """ Tests for AssayDataParser. """ def setUp(self): """ Set up tests. """ self.temp_dir = tempfile.mkdtemp() self.map = { "CID645443": "Cc1ccc(-n2c3c(cc(C(=O)Nc4cccc(C)n4)c2=O)C(=O)CCC3)cc1", "CID2997889": "CC(C)(C)C(=O)Nc1ccc(-c2cn3ccsc3n2)cc1", "CID2244": "CC(=O)Oc1ccccc1C(=O)O", "CID2662": "Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1", "CID3672": "CC(C)Cc1ccc(C(C)C(=O)O)cc1", } _, self.map_filename = tempfile.mkstemp(dir=self.temp_dir, suffix=".pkl") write_pickle(self.map, self.map_filename) # use a subset of AID588342 # note that CID 654924 is duplicated this_dir = os.path.split(os.path.realpath(__file__))[0] self.data_filename = os.path.join(this_dir, "data/test_pcba_data.csv") # set up parser # settings match PcbaParser defaults self.engine = AssayDataParser( self.data_filename, self.map_filename, delimiter=",", primary_key="PUBCHEM_CID", activity_key="PUBCHEM_ACTIVITY_OUTCOME", activity_value="Active", id_prefix="CID", ) def tearDown(self): """ Clean up tests. """ shutil.rmtree(self.temp_dir) def test_read_data(self): """ Test AssayDataParser.read_data. """ data = self.engine.read_data() assert data.shape[0] == 4 # CID 654924 is duplicated assert np.array_equal(data.PUBCHEM_CID, [645443, 645449, 654924, 2997889]) def test_map_ids_to_smiles(self): """ Test AssayDataParser.map_ids_to_smiles. """ data = self.engine.read_data() id_map = read_pickle(self.map_filename) smiles, indices = self.engine.map_ids_to_smiles(data.PUBCHEM_CID, id_map) assert len(smiles) == len(indices) == 2 assert smiles[0] == self.map["CID645443"] assert smiles[1] == self.map["CID2997889"] assert np.array_equal(indices, [0, 3])