def cv_Ridge_Asupervising( xM, yV, s_l, alpha): lr = linear_model.Ridge( alpha = alpha) kf5_c = model_selection.KFold( n_folds=5, shuffle=True) kf5 = kf5_c.split( xM) r2_l = list() for train, test in kf5: xM_shuffle = np.concatenate( (xM[ train, :], xM[ test, :]), axis = 0) # print xM_shuffle.shape A_all = j3x.jpyx.calc_tm_sim_M( xM_shuffle) A = A_all[ :, :len(train)] #print 'A.shape', A.shape s_l_shuffle = [s_l[x] for x in train] #train s_l_shuffle.extend( [s_l[x] for x in test] ) #test molw_l = jchem.rdkit_molwt( s_l_shuffle) A_molw = A A_molw_train = A_molw[:len(train), :] A_molw_test = A_molw[len(train):, :] #print A_molw_train.shape, yV[ train, 0].shape lr.fit( A_molw_train, yV[ train, 0]) #print A_molw_test.shape, yV[ test, 0].shape r2_l.append( lr.score( A_molw_test, yV[ test, 0])) print('R^2 mean, std -->', np.mean( r2_l), np.std( r2_l)) return r2_l
def grid_BIKE2(pdr, alphas_log, y_id = 'Solubility_log_mol_l'): print "BIKE with (A+B)+W" xM1 = jpd.pd_get_xM( pdr, radius=6, nBits=4096) xM2 = jpd.pd_get_xM_MACCSkeys( pdr) yV = jpd.pd_get_yV( pdr, y_id = y_id) #A1 = jpyx.calc_tm_sim_M( xM1) #A2 = jpyx.calc_tm_sim_M( xM2) #A = np.concatenate( ( A1, A2), axis = 1) xM = np.concatenate( ( xM1, xM2), axis = 1) A = jpyx.calc_tm_sim_M( xM1) print A.shape molw_l = jchem.rdkit_molwt( pdr.SMILES.tolist()) print np.shape( molw_l) A_molw = jchem.add_new_descriptor( A, molw_l) print A_molw.shape gs = jgrid.gs_Ridge( A_molw, yV, alphas_log=alphas_log) jutil.show_gs_alpha( gs.grid_scores_) jgrid.cv( 'Ridge', A_molw, yV, alpha = gs.best_params_['alpha']) return gs
def cv_Ridge_Asupervising_2fp( xM1, xM2, yV, s_l, alpha): lr = linear_model.Ridge( alpha = alpha) kf5 = model_selection.KFold( len(s_l), n_folds=5, shuffle=True) r2_l = list() for train, test in kf5: xM1_shuffle = np.concatenate( (xM1[ train, :], xM1[ test, :]), axis = 0) xM2_shuffle = np.concatenate( (xM2[ train, :], xM2[ test, :]), axis = 0) # print xM_shuffle.shape A1_redundant = j3x.jpyx.calc_tm_sim_M( xM1_shuffle) A1 = A1_redundant[ :, :len(train)] A2_redundant = j3x.jpyx.calc_tm_sim_M( xM2_shuffle) A2 = A2_redundant[ :, :len(train)] #print 'A.shape', A.shape s_l_shuffle = [s_l[x] for x in train] #train s_l_shuffle.extend( [s_l[x] for x in test] ) #test molw_l = jchem.rdkit_molwt( s_l_shuffle) molwV = np.mat( molw_l).T #A_molw = jchem.add_new_descriptor( A, molw_l) print(A1.shape, A2.shape, molwV.shape) # A_molw = np.concatenate( (A1, A2, molwV), axis = 1) A_molw = np.concatenate( (A1, A2), axis = 1) print(A_molw.shape) A_molw_train = A_molw[:len(train), :] A_molw_test = A_molw[len(train):, :] #print A_molw_train.shape, yV[ train, 0].shape lr.fit( A_molw_train, yV[ train, 0]) #print A_molw_test.shape, yV[ test, 0].shape r2_l.append( lr.score( A_molw_test, yV[ test, 0])) print('R^2 mean, std -->', np.mean( r2_l), np.std( r2_l)) return r2_l