if __name__ == '__main__': MOLIDS = [ 's=Matsson_2009__n=Bromosulfalein', 's=Zembruski_2011__n=103268452', 's=Patel_2011__n=19', 's=Ochoa-Puentes_2011__n=131273183', 's=Jin_2006__n=Ginsenoside Rg1', 's=Matsson_2007__n=Timolol', ] hub_lso = Hub(dset_id='bcrp', lso=True, expids=range(40000)) hub_csr = Hub(dset_id='bcrp', lso=False, expids=range(40000)) for molid, hub in product(sorted(hub_csr.mols().molids()), (hub_lso, hub_csr)): # print molid, hub.lso # rfr_the_loss(hub, molid) molid, most_influential, r2, _ = regress_the_loss(hub, molid, regressor=LinearRegression()) print molid, hub.lso, r2 for infmolid, coeff in most_influential: print '\t %.4f %s' % (coeff, infmolid) print '-' * 80 # MOLID = 'CHEMBL1951453' # hERG # MOLID = 'NOCAS_M43' # mutagenicity, FAILS with BAD SMELL # MOLID = '74-83-9' # mutagenicity # MOLID = 'Bromocriptine' # pgp-cruciani, BSEP HIT!!! # MOLID = 'Succinylsulfathiazole' # pgp-cruciani