def subject_regularize(rfcs, X_int, X_other, Y, oob=False, regularize=[0.75, 0.3, 0.65]): if len(regularize) == 1: regularize = regularize * 3 observed_ = [] predicted_ = [] for subject in range(1, 50): observed = Y['subject'][subject] rfc = rfcs[1][subject] if oob: predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X_other) predicted_int = rfc.predict(X_int) predicted[:, 0] = predicted_int[:, 0] observed_.append(observed) predicted_.append(predicted) predicted = np.dstack(predicted_) observed = np.ma.dstack(observed_) predicted_mean = np.mean(predicted, axis=2, keepdims=True) predicted_std = np.std(predicted, axis=2, keepdims=True) predicted_mean_std = np.hstack((predicted_mean, predicted_std)).squeeze() predicted_int = regularize[0]*(predicted_mean)\ + (1-regularize[0])*predicted predicted_ple = regularize[1]*(predicted_mean)\ + (1-regularize[1])*predicted predicted_dec = regularize[2]*(predicted_mean)\ + (1-regularize[2])*predicted predicted = regularize[0]*(predicted_mean)\ + (1-regularize[0])*predicted r_int = scoring.r('int', predicted_int, observed) r_ple = scoring.r('ple', predicted_ple, observed) r_dec = scoring.r('dec', predicted_dec, observed) score1_ = scoring.score(predicted, observed, n_subjects=49) score1 = scoring.rs2score(r_int, r_ple, r_dec) #print(score1_,score1) print("For subchallenge %d, score = %.3f (%.3f,%.3f,%.3f)"\ % (1,score1,r_int,r_ple,r_dec)) score2 = scoring.score2(predicted_mean_std, Y['mean_std']) r_int_mean = scoring.r2('int', 'mean', predicted_mean_std, Y['mean_std']) r_ple_mean = scoring.r2('ple', 'mean', predicted_mean_std, Y['mean_std']) r_dec_mean = scoring.r2('dec', 'mean', predicted_mean_std, Y['mean_std']) r_int_std = scoring.r2('int', 'std', predicted_mean_std, Y['mean_std']) r_ple_std = scoring.r2('ple', 'std', predicted_mean_std, Y['mean_std']) r_dec_std = scoring.r2('dec', 'std', predicted_mean_std, Y['mean_std']) print("For subchallenge %d, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)"%\ (2,score2,r_int_mean,r_ple_mean,r_dec_mean, r_int_std,r_ple_std,r_dec_std)) return (r_int, r_ple, r_dec, r_int_mean, r_ple_mean, r_dec_mean, r_int_std, r_ple_std, r_dec_std)
def rfc_(X_train, Y_train, X_test_int, X_test_other, Y_test, max_features=1500, n_estimators=1000, max_depth=None, min_samples_leaf=1): print(max_features) def rfc_maker(): return RandomForestRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, n_jobs=-1, oob_score=True, random_state=0) rfc = rfc_maker() rfc.fit(X_train, Y_train) scores = {} for phase, X, Y in [('train', X_train, Y_train), ('test', (X_test_int, X_test_other), Y_test)]: if phase == 'train': predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X[1]) predicted_int = rfc.predict(X[0]) predicted[:, 0] = predicted_int[:, 0] predicted[:, 21] = predicted_int[:, 21] observed = Y score = scoring.score2(predicted, observed) r_int = scoring.r2('int', 'mean', predicted, observed) r_ple = scoring.r2('ple', 'mean', predicted, observed) r_dec = scoring.r2('dec', 'mean', predicted, observed) r_int_sig = scoring.r2('int', 'sigma', predicted, observed) r_ple_sig = scoring.r2('ple', 'sigma', predicted, observed) r_dec_sig = scoring.r2('dec', 'sigma', predicted, observed) print("For subchallenge 2, %s phase, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" \ % (phase,score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig)) scores[phase] = (score, r_int, r_ple, r_dec, r_int_sig, r_ple_sig, r_dec_sig) return rfc, scores['train'], scores['test']
def subject_regularize(rfcs,X_int,X_other,Y,oob=False,regularize=[0.75,0.3,0.65]): if len(regularize)==1: regularize = regularize*3 observed_ = [] predicted_ = [] for subject in range(1,50): observed = Y['subject'][subject] rfc = rfcs[1][subject] if oob: predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X_other) predicted_int = rfc.predict(X_int) predicted[:,0] = predicted_int[:,0] observed_.append(observed) predicted_.append(predicted) predicted = np.dstack(predicted_) observed = np.ma.dstack(observed_) predicted_mean = np.mean(predicted,axis=2,keepdims=True) predicted_std = np.std(predicted,axis=2,keepdims=True) predicted_mean_std = np.hstack((predicted_mean,predicted_std)).squeeze() predicted_int = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted predicted_ple = regularize[1]*(predicted_mean) + (1-regularize[1])*predicted predicted_dec = regularize[2]*(predicted_mean) + (1-regularize[2])*predicted predicted = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted r_int = scoring.r('int',predicted_int,observed) r_ple = scoring.r('ple',predicted_ple,observed) r_dec = scoring.r('dec',predicted_dec,observed) score1_ = scoring.score(predicted,observed,n_subjects=49) score1 = scoring.rs2score(r_int,r_ple,r_dec) #print(score1_,score1) print("For subchallenge %d, score = %.3f (%.3f,%.3f,%.3f)" % (1,score1,r_int,r_ple,r_dec)) score2 = scoring.score2(predicted_mean_std,Y['mean_std']) r_int_mean = scoring.r2('int','mean',predicted_mean_std,Y['mean_std']) r_ple_mean = scoring.r2('ple','mean',predicted_mean_std,Y['mean_std']) r_dec_mean = scoring.r2('dec','mean',predicted_mean_std,Y['mean_std']) r_int_sigma = scoring.r2('int','sigma',predicted_mean_std,Y['mean_std']) r_ple_sigma = scoring.r2('ple','sigma',predicted_mean_std,Y['mean_std']) r_dec_sigma = scoring.r2('dec','sigma',predicted_mean_std,Y['mean_std']) print("For subchallenge %d, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" % \ (2,score2,r_int_mean,r_ple_mean,r_dec_mean,r_int_sigma,r_ple_sigma,r_dec_sigma)) return (r_int,r_ple,r_dec,r_int_mean,r_ple_mean,r_dec_mean,r_int_sigma,r_ple_sigma,r_dec_sigma)
def rfc_(X_train,Y_train,X_test_int,X_test_other,Y_test, max_features=1500,n_estimators=1000,max_depth=None,min_samples_leaf=1): print(max_features) def rfc_maker(): return RandomForestRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, n_jobs=-1, oob_score=True, random_state=0) rfc = rfc_maker() rfc.fit(X_train,Y_train) scores = {} for phase,X,Y in [('train',X_train,Y_train),('test',(X_test_int,X_test_other),Y_test)]: if phase == 'train': predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X[1]) predicted_int = rfc.predict(X[0]) predicted[:,0] = predicted_int[:,0] predicted[:,21] = predicted_int[:,21] observed = Y score = scoring.score2(predicted,observed) r_int = scoring.r2('int','mean',predicted,observed) r_ple = scoring.r2('ple','mean',predicted,observed) r_dec = scoring.r2('dec','mean',predicted,observed) r_int_sig = scoring.r2('int','sigma',predicted,observed) r_ple_sig = scoring.r2('ple','sigma',predicted,observed) r_dec_sig = scoring.r2('dec','sigma',predicted,observed) print("For subchallenge 2, %s phase, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" \ % (phase,score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig)) scores[phase] = (score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig) return rfc,scores['train'],scores['test']
def rfc_final(X,Y_imp,Y_mask, max_features,min_samples_leaf,max_depth,et,use_mask,trans_weight, trans_params,X_test_int=None,X_test_other=None,Y_test=None,n_estimators=100,seed=0,quiet=False): if X_test_int is None: X_test_int = X if X_test_other is None: X_test_other = X if Y_test is None: Y_test = Y_mask def rfc_maker(n_estimators=n_estimators,max_features=max_features, min_samples_leaf=min_samples_leaf,max_depth=max_depth,et=False): if not et: kls = RandomForestRegressor kwargs = {'oob_score':False} else: kls = ExtraTreesRegressor kwargs = {} return kls(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, n_jobs=-1, random_state=seed, **kwargs) rfcs = {} for col in range(42): prog(col,42) rfcs[col] = rfc_maker(n_estimators=n_estimators, max_features=max_features[col], min_samples_leaf=min_samples_leaf[col], max_depth=max_depth[col], et=et[col]) if use_mask[col]: rfcs[col].fit(X,Y_mask[:,col]) else: rfcs[col].fit(X,Y_imp[:,col]) predicted = np.zeros((X_test_int.shape[0],42)) for col in range(42): if et[col] or not np.array_equal(X,X_test_int): # Possibly check in-sample fit because there isn't any alternative. if col in [0,21]: predicted[:,col] = rfcs[col].predict(X_test_int) else: predicted[:,col] = rfcs[col].predict(X_test_other) else: try: predicted[:,col] = rfcs[col].oob_prediction_ except AttributeError: if col in [0,21]: predicted[:,col] = rfcs[col].predict(X_test_int) else: predicted[:,col] = rfcs[col].predict(X_test_other) def f_transform(x, k0, k1): return 100*(k0*(x/100)**(k1*0.5) - k0*(x/100)**(k1*2)) for col in range(21): tw = trans_weight[col] k0,k1 = trans_params[col] p_m = predicted[:,col] p_s = predicted[:,col+21] predicted[:,col+21] = tw*f_transform(p_m,k0,k1) + (1-tw)*p_s observed = Y_test score = scoring.score2(predicted,observed) rs = {} for kind in ['int','ple','dec']: rs[kind] = {} for moment in ['mean','sigma']: rs[kind][moment] = scoring.r2(kind,moment,predicted,observed) if not quiet: print("For subchallenge 2:") print("\tScore = %.2f" % score) for kind in ['int','ple','dec']: for moment in ['mean','sigma']: print("\t%s_%s = %.3f" % (kind,moment,rs[kind][moment])) return (rfcs,score,rs)
def rfc_final(X,Y_imp,Y_mask, max_features,min_samples_leaf,max_depth,et,use_mask, Y_test=None,n_estimators=100,seed=0): if Y_test is None: Y_test = Y_mask def rfc_maker(n_estimators=n_estimators,max_features=max_features, min_samples_leaf=min_samples_leaf,max_depth=max_depth,et=False): if not et: return RandomForestRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, oob_score=True, n_jobs=-1,random_state=seed) else: return ExtraTreesRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, n_jobs=-1,random_state=seed) rfcs = {} for kind in ['int','ple','dec']: rfcs[kind] = {} for moment in ['mean','sigma']: rfcs[kind][moment] = rfc_maker(n_estimators=n_estimators, max_features=max_features[kind][moment], min_samples_leaf=min_samples_leaf[kind][moment], max_depth=max_depth[kind][moment], et=et[kind][moment]) for kind in ['int','ple','dec']: for moment in ['mean','sigma']: if use_mask[kind][moment]: rfcs[kind][moment].fit(X,Y_mask) else: rfcs[kind][moment].fit(X,Y_imp) predictions = {} for kind in ['int','ple','dec']: predictions[kind] = {} for moment in ['mean','sigma']: if et[kind][moment]: # Check in-sample fit because there isn't any alternative. predictions[kind][moment] = rfcs[kind][moment].predict(X) else: predictions[kind][moment] = rfcs[kind][moment].oob_prediction_ predicted = predictions['int']['mean'].copy() for i,moment in enumerate(['mean','sigma']): predicted[:,(0+21*i)] = predictions['int'][moment][:,(0+21*i)] predicted[:,(1+21*i)] = predictions['ple'][moment][:,(1+21*i)] predicted[:,(2+21*i):(21+21*i)] = predictions['dec'][moment][:,(2+21*i):(21+21*i)] observed = Y_test score = scoring.score2(predicted,observed) rs = {} predictions = {} for kind in ['int','ple','dec']: rs[kind] = {} for moment in ['mean','sigma']: rs[kind][moment] = scoring.r2(kind,moment,predicted,observed) rs['int']['trans'] = scoring.r2(None,None,f_int(predicted[:,0]),observed[:,0]) print("For subchallenge 2:") print("\tScore = %.2f" % score) for kind in ['int','ple','dec']: for moment in ['mean','sigma']: print("\t%s_%s = %.3f" % (kind,moment,rs[kind][moment])) return (rfcs,score,rs)
def rfc_final(X, Y_imp, Y_mask, max_features, min_samples_leaf, max_depth, et, use_mask, Y_test=None, n_estimators=100, seed=0): if Y_test is None: Y_test = Y_mask def rfc_maker(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, et=False): if not et: return RandomForestRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, oob_score=True, n_jobs=-1, random_state=seed) else: return ExtraTreesRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, n_jobs=-1, random_state=seed) rfcs = {} for kind in ['int', 'ple', 'dec']: rfcs[kind] = {} for moment in ['mean', 'sigma']: rfcs[kind][moment] = rfc_maker( n_estimators=n_estimators, max_features=max_features[kind][moment], min_samples_leaf=min_samples_leaf[kind][moment], max_depth=max_depth[kind][moment], et=et[kind][moment]) for kind in ['int', 'ple', 'dec']: for moment in ['mean', 'sigma']: if use_mask[kind][moment]: rfcs[kind][moment].fit(X, Y_mask) else: rfcs[kind][moment].fit(X, Y_imp) predictions = {} for kind in ['int', 'ple', 'dec']: predictions[kind] = {} for moment in ['mean', 'sigma']: if et[kind][moment]: # Check in-sample fit because there isn't any alternative. predictions[kind][moment] = rfcs[kind][moment].predict(X) else: predictions[kind][moment] = rfcs[kind][moment].oob_prediction_ predicted = predictions['int']['mean'].copy() for i, moment in enumerate(['mean', 'sigma']): predicted[:, (0 + 21 * i)] = predictions['int'][moment][:, (0 + 21 * i)] predicted[:, (1 + 21 * i)] = predictions['ple'][moment][:, (1 + 21 * i)] predicted[:, (2 + 21 * i):(21 + 21 * i)] = predictions['dec'][moment][:, ( 2 + 21 * i):(21 + 21 * i)] observed = Y_test score = scoring.score2(predicted, observed) rs = {} predictions = {} for kind in ['int', 'ple', 'dec']: rs[kind] = {} for moment in ['mean', 'sigma']: rs[kind][moment] = scoring.r2(kind, moment, predicted, observed) rs['int']['trans'] = scoring.r2(None, None, f_int(predicted[:, 0]), observed[:, 0]) print("For subchallenge 2:") print("\tScore = %.2f" % score) for kind in ['int', 'ple', 'dec']: for moment in ['mean', 'sigma']: print("\t%s_%s = %.3f" % (kind, moment, rs[kind][moment])) return (rfcs, score, rs)