def lrp(self,R,*args,**kwargs): # just propagate R further down. # makes sure subroutines never get called. #return R*self.X data_io.write(R,'../r_array/softmax.npy') # print("softmax is saved") return R
def lrp(self, R, *args, **kwargs): # just propagate R further down. # makes sure subroutines never get called. # logger.info("=======================Lrp Flatten Check===============================") # logger.info("Flatten LRP is {0}".format(R.shape)) # logger.info("the Flatten LRP value is : {0}".format(np.sum(~np.isnan(R)))) # Rpy = np.zeros_like(R,dtype=np.float) # print("check : {}!!".format(Rpy.shape)) # print(self.inputshape) Rsave = R Rx = np.reshape(R, self.inputshape) # logger.info("Flatten LRP checking is {0}".format(np.reshape(R, self.inputshape).shape)) if Rx.shape[2] == 1: Rsave2 = np.reshape(Rsave, [10, 1]) Rlim2 = np.reshape(Rx, [10, 1]) Rsave2 = np.concatenate((Rsave2, Rlim2), axis=1) Rfile2 = '../r_array/flat_4.npy' data_io.write(Rsave2, Rfile2) # print('flatten 4 passed') else: data_io.write(Rx, '../r_array/flatten.npy') # print("flatten is saved") # logger.info("=======================Lrp Flatten Done===============================") return np.reshape(R, self.inputshape)
def _simple_lrp_slow(self, R): N, H, W, D = self.X.shape hpool, wpool = self.pool hstride, wstride = self.stride #assume the given pooling and stride parameters are carefully chosen. Hout = (H - hpool) // hstride + 1 Wout = (W - wpool) // wstride + 1 Rx = np.zeros_like(self.X, dtype=np.float) for i in range(Hout): for j in range(Wout): Z = self.Y[:, i:i + 1, j:j + 1, :] == self.X[:, i * hstride:i * hstride + hpool, j * wstride:j * wstride + wpool, :] Zs = Z.sum(axis=(1, 2), keepdims=True, dtype=np.float ) #thanks user wodtko for reporting this bug/fix Rx[:, i * hstride:i * hstride + hpool, j * wstride:j * wstride + wpool, :] += (Z / Zs) * R[:, i:i + 1, j:j + 1, :] Rfile = '../r_array/maxpool' + str(i) + '_Hout_' + str( j) + '_Wout' + '.npy' data_io.write(Rx, Rfile) # print("maxpool Rx is saved") data_io.write(Rx, '../r_array/maxpool.npy') # print("maxpolling Rx is saved") return Rx
def lrp(self, R, *args, **kwargs): # component-wise operations within this layer # -> # just propagate R further down. # makes sure subroutines never get called. data_io.write(R, '../r_array/tanh.npy') # print("tanh is saved") return R
def baseline(output_dir, basename, valid_num, test_num, target_num): preds_valid = np.zeros([valid_num , target_num]) preds_test = np.zeros([test_num , target_num]) cycle = 0 filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_test), preds_test)
def write_all_zeros(output_dir, basename, valid_num, test_num, target_num): #if something break, to have 0 prediction preds_valid = np.zeros([valid_num, target_num]) preds_test = np.zeros([test_num, target_num]) cycle = 0 filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir, filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir, filename_test), preds_test)
def write_all_zeros(output_dir, basename, valid_num, test_num, target_num): #if something break, to have 0 prediction preds_valid = np.zeros([valid_num , target_num]) preds_test = np.zeros([test_num , target_num]) cycle = 0 filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_test), preds_test)
def forward(self, X, *args, **kwargs): self.Y = np.maximum(0, X) # logger.info("=======================Rect Check===============================") # logger.info("the Recification input of shape is {0}".format(X.shape)) # logger.info("the Recification input shape value is : {0}".format(np.sum(~np.isnan(X)))) input_check = '../r_array/Rect_input({},{},{}).npy'.format( X.shape[0], X.shape[1], X.shape[2]) data_io.write(self.Y, input_check) # logger.info("the Recification output of shape is {0}".format(self.Y.shape)) # logger.info("the Recification output value is : {0}".format(np.sum(~np.isnan(self.Y)))) output_check = '../r_array/Rect_out({},{},{}).npy'.format( self.Y.shape[0], self.Y.shape[1], self.Y.shape[2]) data_io.write(self.Y, output_check) # logger.info("=======================Rect Done===============================") return self.Y
def _epsilon_lrp(self,R,epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 ''' Zs = self.Y + epsilon * ((self.Y >= 0)*2-1)#prepare stabilized denominator # Has the forward pass been computed lrp-aware? # This exchanges time spent in the forward pass for lower LRP time # and is useful, if e.g. several parameter settings for LRP need to be evaluated # for the same input data. if self.lrp_aware: return (self.Z * (R/Zs)[:,na,:]).sum(axis=2) else: Z = self.W[na,:,:]*self.X[:,:,na] #localized preactivations Zx = (Z * (R/Zs)[:,na,:]).sum(axis =2) data_io.write(Zx,'../r_array/linear.npy') # print("linear is saved") return (Z * (R/Zs)[:,na,:]).sum(axis=2)
def predict(datanames, input_dir): """ Main function. """ overall_time_budget = 0 res_dir = os.path.join(CONFIG['root_dir'], "res") for basename in datanames: print "\n*** Processing dataset %s" % basename.upper() start = time.time() D = DataManager(basename, input_dir, replace_missing=False, filter_features=False, verbose=False) # Set overall time budget with this dataset's allocated time time_budget = int(0.8 * D.info['time_budget']) overall_time_budget = overall_time_budget + time_budget read_time = time.time() - start ts = time.time() aml = AutoML(D, CONFIG) aml.run_predict(time_budget) run_time = time.time() - ts end = time.time() print "* Time:: budget=%5.2f, load=%5.2f, run=%5.2f, remaining=%5.2f" \ % (time_budget, read_time, run_time, time_budget - (end - start)) for i, res in enumerate(aml._Y): filename = basename + "_valid_" + str(i).zfill(3) + ".predict" data_io.write( os.path.join(res_dir, filename), aml._Y[i]['Y_valid']) filename = basename + "_test_" + str(i).zfill(3) + ".predict" data_io.write( os.path.join(res_dir, filename), aml._Y[i]['Y_test']) return True
def lrp(self, R, *args, **kwargs): # component-wise operations within this layer # -> # just propagate R further down. # makes sure subroutines never get called. # logger.info("=======================LRP Rect Check===============================") # logger.info("the Rect LRP input shape is {0}".format(R.shape)) # logger.info("the Rect LRP input value is : {0}".format(np.sum(~np.isnan(R)))) Rx = np.zeros_like(R, dtype=np.float) Rsave = Rx if R.shape[2] == 2: Rsave3 = np.reshape(Rsave, [10, 40]) Rlim3 = np.reshape(R, [10, 40]) Rsave3 = np.concatenate((Rsave3, Rlim3), axis=1) Rfile3 = '../r_array/rec_3.npy' data_io.write(Rsave3, Rfile3) elif R.shape[2] == 10: Rsave2 = np.reshape(Rsave, [10, 250]) Rlim2 = np.reshape(R, [10, 250]) Rsave2 = np.concatenate((Rsave2, Rlim2), axis=1) Rfile2 = '../r_array/rec_2.npy' data_io.write(Rsave2, Rfile2) else: Rsave1 = np.reshape(Rsave, [28, 280]) Rlim1 = np.reshape(R, [28, 280]) Rsave1 = np.concatenate((Rsave1, Rlim1), axis=1) Rfile1 = '../r_array/rec_1.npy' data_io.write(Rsave1, Rfile1) data_io.write(R, '../r_array/rect.npy') # logger.info("the Recification LRP shape is {0}".format(R.shape)) lrp_check = '../r_array/rect_lrp({},{},{}).npy'.format( R.shape[0], R.shape[1], R.shape[2]) data_io.write(R, lrp_check) # logger.info("the Rect LRP output shape is {0}".format(R.shape)) # logger.info("the Rect LRP output value is : {0}".format(np.sum(~np.isnan(R)))) # logger.info("=======================LRP Rect Done===============================") return R
print("Lrp R shape {} : ".format(Rinit.shape)) #compute first layer relevance according to prediction #R = nn.lrp(Rinit) #as Eq(56) from DOI: 10.1371/journal.pone.0130140 R = nn.lrp(Rinit,'epsilon',1.) R = R.sum(axis=3) xs = ((x+1.)/2.).sum(axis=3) if not np == numpy: xs = np.asnumpy(xs) R = np.asnumpy(R) digit = render.digit_to_rgb(xs, scaling = 3) hm = render.hm_to_rgb(R, X = xs, scaling = 3, sigma = 2) digit_hm = render.save_image([digit,hm],'../heatmap.png') data_io.write(R,'../heatmap.npy') data_io.write(xs,'../xs.npy') print(xs.shape) y = xs a = np.load('../r_array/convolution.npy') a = np.reshape(a,[a.shape[1]*a.shape[2],1]) b = np.load('../r_array/rect.npy') b = np.pad(b,((0,0),(2,2),(2,2),(0,0))) b = np.reshape(b,[b.shape[1]*b.shape[2],b.shape[0]*b.shape[3]]) c = np.load('../r_array/sumpoll.npy') c = np.pad(c,((0,0),(2,2),(2,2),(0,0))) c = np.reshape(c,[c.shape[1]*c.shape[2],c.shape[3]]) new_b = np.hstack((b, c)) new = np.hstack((a, new_b)) y = np.reshape(y, [y.shape[0]*y.shape[1]*y.shape[2]])
def predict (LD, output_dir, basename): import copy import os import numpy as np import libscores import data_converter from sklearn import preprocessing, ensemble from sklearn.utils import shuffle LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1) Y_train = LD.data['Y_train'] X_train = LD.data['X_train'] Xta = np.copy(X_train) X_valid = LD.data['X_valid'] X_test = LD.data['X_test'] Xtv = np.copy(X_valid) Xts = np.copy(X_test) import xgboost as xgb if LD.info['name']== 'albert': model = xgb.XGBClassifier(max_depth=6, learning_rate=0.05, n_estimators=1800, silent=True, objective='binary:logistic', nthread=6, gamma=0.6, min_child_weight=0.7, max_delta_step=0, subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None) if LD.info['name']== 'dilbert': model = xgb.XGBClassifier(max_depth=4, learning_rate=0.1, n_estimators=1000, silent=True, objective='multi:softprob', nthread=-1, gamma=0, min_child_weight=0, max_delta_step=0, subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None) if LD.info['name']== 'fabert': model = xgb.XGBClassifier(max_depth=6, learning_rate=0.1, n_estimators=1200, silent=True, objective='multi:softprob', nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None) if LD.info['name']== 'robert': model = xgb.XGBClassifier(max_depth=6, learning_rate=0.1, n_estimators=600, silent=True, objective='multi:softprob', nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None) if LD.info['name']== 'volkert': from sklearn import ensemble, preprocessing p = preprocessing.PolynomialFeatures() prep = ensemble.RandomForestRegressor(n_estimators=24, n_jobs=-1, random_state=0, verbose=1) prep.fit(Xta,Y_train) Xta = Xta [:, prep.feature_importances_.argsort()[-50:][::-1]] Xtv = Xtv [:, prep.feature_importances_.argsort()[-50:][::-1]] Xts = Xts [:, prep.feature_importances_.argsort()[-50:][::-1]] Xta = p.fit_transform(Xta) Xtv = p.fit_transform(Xtv) Xts = p.fit_transform(Xts) prep.fit(Xta,Y_train) Xta = Xta [:, prep.feature_importances_.argsort()[-800:][::-1]] Xtv = Xtv [:, prep.feature_importances_.argsort()[-800:][::-1]] Xts = Xts [:, prep.feature_importances_.argsort()[-800:][::-1]] X_train = np.hstack([X_train, Xta]) X_valid = np.hstack([X_valid, Xtv]) X_test = np.hstack([X_test, Xts]) model = xgb.XGBClassifier(max_depth=6, learning_rate=0.1, n_estimators=350, silent=True, objective='multi:softprob', nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None) model.fit(X_train, Y_train) preds_valid = model.predict_proba(X_valid) preds_test = model.predict_proba(X_test) import data_io if LD.info['target_num'] == 1: preds_valid = preds_valid[:,1] preds_test = preds_test[:,1] preds_valid = np.clip(preds_valid,0,1) preds_test = np.clip(preds_test,0,1) data_io.write(os.path.join(output_dir, basename + '_valid_000.predict'), preds_valid) data_io.write(os.path.join(output_dir,basename + '_test_000.predict'), preds_test)
def predict (LD, output_dir, basename): import os import numpy as np import random import data_converter from sklearn import preprocessing, decomposition from sklearn.utils import shuffle import time from sklearn.externals import joblib from lasagne import layers from lasagne.updates import nesterov_momentum from lasagne.updates import norm_constraint import lasagne import theano import theano.tensor as T from lasagne.regularization import regularize_layer_params, regularize_layer_params_weighted, l2, l1 from lasagne.updates import norm_constraint, total_norm_constraint np.random.seed(0) random.seed(0) LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1) X_train = LD.data['X_train'] X_valid = LD.data['X_valid'] X_test = LD.data['X_test'] fs = decomposition.PCA(n_components=27) fs.fit(X_train) X_train2 = fs.transform(X_train) X_valid2 = fs.transform(X_valid) X_test2 = fs.transform(X_test) X_train = np.hstack([X_train, X_train2]) X_valid = np.hstack([X_valid, X_valid2]) X_test = np.hstack([X_test, X_test2]) normx = preprocessing.StandardScaler(with_mean=True) normx.fit(X_train) X_train = normx.transform(X_train) X_valid = normx.transform(X_valid) X_test = normx.transform(X_test) X_train = np.float32(X_train) X_valid = np.float32(X_valid) X_test = np.float32(X_test) try: y_train = np.array(data_converter.convert_to_bin(LD.data['Y_train'], len(np.unique(LD.data['Y_train'])), False)) y_train = np.int16(y_train) except: y_train = np.copy(LD.data['Y_train']) def batches(X, y, csize, rs): X, y = shuffle(X, y, random_state=rs) for cstart in range(0, len(X) - csize+1, csize): Xc = X[cstart:cstart+csize] yc = y[cstart:cstart+csize] yield Xc, yc input_var = T.matrix('inputs') target_var = T.matrix('targets') l_in = lasagne.layers.InputLayer(shape=(None, X_train.shape[1]), input_var=input_var, nonlinearity=None,) l_hid1 = lasagne.layers.DenseLayer( l_in, num_units= 500, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.Sparse() ) l_hid2 = lasagne.layers.DenseLayer( l_hid1, num_units= 500, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.GlorotUniform()) Lnum_out_units = 100 l_out = lasagne.layers.DenseLayer( l_hid2, num_units=Lnum_out_units, nonlinearity=lasagne.nonlinearities.softmax) network = l_out prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() loss2 = lasagne.objectives.multiclass_hinge_loss(prediction, target_var, delta=0.6) loss2 = loss2.mean() loss = loss*2 + loss2 params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=0.0002, beta1=0.95, beta2=0.999 ) train_fn = theano.function([input_var, target_var], loss, updates=updates) for epoch in range(150): train_err = 0 train_batches = 0 for batch in batches(X_train, y_train, 60 + int(epoch/10), epoch): Xt, yt = batch train_err += train_fn(Xt, yt) train_batches += 1 xml1 = T.matrix('xml1') Xlt1 = lasagne.layers.get_output(l_out, xml1, deterministic=True) f2 = theano.function([xml1], Xlt1) preds_valid = f2(X_valid) preds_test = f2(X_test) import data_io if LD.info['target_num'] == 1: preds_valid = preds_valid[:,1] preds_test = preds_test[:,1] preds_valid = np.clip(preds_valid,0,1) preds_test = np.clip(preds_test,0,1) cycle = 0 filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_test), preds_test)
def predict(LD, output_dir, basename): import copy import os import numpy as np import libscores import data_converter from sklearn import preprocessing, ensemble from sklearn.utils import shuffle LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'], random_state=1) Y_train = LD.data['Y_train'] X_train = LD.data['X_train'] Xta = np.copy(X_train) X_valid = LD.data['X_valid'] X_test = LD.data['X_test'] Xtv = np.copy(X_valid) Xts = np.copy(X_test) import xgboost as xgb if LD.info['name'] == 'alexis': model = ensemble.RandomForestClassifier(max_depth=140, n_estimators=1800, n_jobs=-1, random_state=0, verbose=0, warm_start=True) model2 = ensemble.RandomForestClassifier(max_depth=140, n_estimators=1800, n_jobs=-1, random_state=1, verbose=0, warm_start=True) model.fit(X_train, Y_train) model2.fit(X_train, Y_train) preds_valid0 = model.predict_proba(X_valid) preds_test0 = model.predict_proba(X_test) preds_valid2 = model2.predict_proba(X_valid) preds_test2 = model2.predict_proba(X_test) preds_valid0 = np.array(preds_valid0) preds_valid2 = np.array(preds_valid2) preds_test0 = np.array(preds_test0) preds_test2 = np.array(preds_test2) preds_valid = (preds_valid0 + preds_valid2) / 2 preds_test = (preds_test0 + preds_test2) / 2 preds_valid = preds_valid[:, :, 1] preds_valid = preds_valid.T preds_test = preds_test[:, :, 1] preds_test = preds_test.T if LD.info['name'] == 'dionis': Lest = 600 #600 will consume cca 250 GB of RAM, use 50 for similar result #Lest = 50 model = ensemble.RandomForestClassifier(n_jobs=-1, n_estimators=Lest, random_state=0) model.fit(X_train, Y_train) preds_valid0 = model.predict_proba(X_valid) preds_test0 = model.predict_proba(X_test) model = ensemble.RandomForestClassifier(n_jobs=-1, n_estimators=Lest, random_state=1) model.fit(X_train, Y_train) preds_valid1 = model.predict_proba(X_valid) preds_test1 = model.predict_proba(X_test) model = ensemble.RandomForestClassifier(n_jobs=-1, n_estimators=Lest, random_state=2) model.fit(X_train, Y_train) preds_valid2 = model.predict_proba(X_valid) preds_test2 = model.predict_proba(X_test) model = ensemble.RandomForestClassifier(n_jobs=-1, n_estimators=Lest, random_state=3) model.fit(X_train, Y_train) preds_valid3 = model.predict_proba(X_valid) preds_test3 = model.predict_proba(X_test) model = ensemble.RandomForestClassifier(n_jobs=-1, n_estimators=Lest, random_state=4) model.fit(X_train, Y_train) preds_valid4 = model.predict_proba(X_valid) preds_test4 = model.predict_proba(X_test) preds_valid = (preds_valid0 + preds_valid1 + preds_valid2 + preds_valid3 + preds_valid4 ) # /5 should be included (bug) preds_test = (preds_test0 + preds_test1 + preds_test2 + preds_test3 + preds_test4) # /5 should be included (bug) if LD.info['name'] == 'grigoris': model = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=0, verbose=0) model2 = linear_model.LogisticRegression(penalty='l1', random_state=1, n_jobs=-1, C=0.008) model3 = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=1, verbose=0) model4 = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=2, verbose=0) preds_valid = np.zeros((X_valid.shape[0], Y_train.shape[1])) preds_test = np.zeros((X_test.shape[0], Y_train.shape[1])) for pyt in range(Y_train.shape[1]): print pyt ytp = Y_train[:, pyt] model.fit(X_train, ytp) model2.fit(X_train, ytp) model3.fit(X_train, ytp) model4.fit(X_train, ytp) preds1v = model.predict_proba(X_valid)[:, 1] preds2v = model2.predict_proba(X_valid)[:, 1] preds3v = model3.predict_proba(X_valid)[:, 1] preds4v = model4.predict_proba(X_valid)[:, 1] predsv = (preds1v + preds2v + preds3v + preds4v) / 4 preds_valid[:, pyt] = predsv preds1t = model.predict_proba(X_test)[:, 1] preds2t = model2.predict_proba(X_test)[:, 1] preds3t = model3.predict_proba(X_test)[:, 1] preds4t = model4.predict_proba(X_test)[:, 1] predst = (preds1t + preds2t + preds3t + preds4t) / 4 preds_test[:, pyt] = predst if LD.info['name'] == 'jannis': Xd = X_train[Y_train == 0] yd = Y_train[Y_train == 0] for a in range(18): X_train = np.vstack([X_train, Xd]) Y_train = np.hstack([Y_train, yd]) Xd = X_train[Y_train == 2] yd = Y_train[Y_train == 2] X_train = np.vstack([X_train, Xd]) Y_train = np.hstack([Y_train, yd]) Y_train_raw = np.array( data_converter.convert_to_bin(Y_train, len(np.unique(Y_train)), False)) preds_valid = np.zeros((X_valid.shape[0], Y_train_raw.shape[1])) preds_test = np.zeros((X_test.shape[0], Y_train_raw.shape[1])) for pyt in range(Y_train_raw.shape[1]): if pyt == 0: Lbs = 0.2 else: Lbs = 0.5 model = xgb.XGBClassifier(max_depth=30, learning_rate=0.05, n_estimators=100, silent=True, objective='binary:logistic', nthread=-1, gamma=0, min_child_weight=80, max_delta_step=1, subsample=1, colsample_bytree=1, base_score=Lbs, seed=0, missing=None) ytp = Y_train_raw[:, pyt] model.fit(X_train, ytp) preds1v = model.predict_proba(X_valid)[:, 1] preds_valid[:, pyt] = preds1v preds1t = model.predict_proba(X_test)[:, 1] preds_test[:, pyt] = preds1t if LD.info['name'] == 'wallis': model = naive_bayes.MultinomialNB(alpha=0.02) model2 = xgb.XGBClassifier(max_depth=5, learning_rate=0.05, n_estimators=1200, silent=True, objective='multi:softprob', nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None) model.fit(X_train, Y_train) preds_valid1 = model.predict_proba(X_valid) preds_test1 = model.predict_proba(X_test) model2.fit(X_train, Y_train) preds_valid2 = model2.predict_proba(X_valid) preds_test2 = model2.predict_proba(X_test) preds_valid = (preds_valid1 + preds_valid2) / 2 preds_test = (preds_test1 + preds_test2) / 2 import data_io if LD.info['target_num'] == 1: preds_valid = preds_valid[:, 1] preds_test = preds_test[:, 1] preds_valid = np.clip(preds_valid, 0, 1) preds_test = np.clip(preds_test, 0, 1) data_io.write(os.path.join(output_dir, basename + '_valid_000.predict'), preds_valid) data_io.write(os.path.join(output_dir, basename + '_test_000.predict'), preds_test)
with tf.Session() as sess: sess.run(init) saver.restore(sess, model_path) for inx in I[:12]: test_x = mnist.test.images[inx] test_x = (test_x - 0.5) * 2 test_y = mnist.test.labels[inx] relevance = sess.run(R, feed_dict={ x: test_x[np.newaxis, :] }) # import pdb; pdb.set_trace() pred_y = sess.run(pred, feed_dict={ x: test_x[np.newaxis, :] }) digit = render.digit_to_rgb(test_x, scaling = 3) hm = render.hm_to_rgb(relevance, X = test_x, scaling = 3, sigma = 2) digit_hm = render.save_image([digit,hm],'./heatmap.png') data_io.write(relevance,'./heatmap.npy') print ('True Class: {}'.format(np.argmax(test_y))) print ('Predicted Class: {}\n'.format(np.argmax(pred_y))) #display the image as written to file plt.imshow(digit_hm, interpolation = 'none', cmap=plt.cm.binary) plt.axis('off') plt.show()
def ingestion_fn(dataset_dir, code_dir, time_budget, time_budget_approx, output_dir, score_dir, model_config_name=None, model_config=None): #### Check whether everything went well ingestion_success = True # Parse directories root_dir = _HERE(os.pardir) ingestion_program_dir = join(root_dir, "ingestion_program") if dataset_dir.endswith("run/input") and code_dir.endswith("run/program"): logger.debug( "Since dataset_dir ends with 'run/input' and code_dir " "ends with 'run/program', suppose running on " + "CodaLab platform. Modify dataset_dir to 'run/input_data' " "and code_dir to 'run/submission'. " + "Directory parsing should be more flexible in the code of " + "compute worker: we need explicit directories for " + "dataset_dir and code_dir.") dataset_dir = dataset_dir.replace("run/input", "run/input_data") code_dir = code_dir.replace("run/program", "run/submission") # Show directories for debugging logger.debug("sys.argv = " + str(sys.argv)) logger.debug("Using dataset_dir: " + dataset_dir) logger.debug("Using output_dir: " + output_dir) logger.debug("Using ingestion_program_dir: " + ingestion_program_dir) logger.debug("Using code_dir: " + code_dir) # Our libraries path.append(ingestion_program_dir) path.append(code_dir) # IG: to allow submitting the starting kit as sample submission path.append(code_dir + "/sample_code_submission") import data_io from dataset import AutoDLDataset # THE class of AutoDL datasets data_io.mkdir(output_dir) #### INVENTORY DATA (and sort dataset names alphabetically) datanames = data_io.inventory_data(dataset_dir) #### Delete zip files and metadata file datanames = [x for x in datanames if x.endswith(".data")] if len(datanames) != 1: raise ValueError("{} datasets found in dataset_dir={}!\n".format( len(datanames), dataset_dir) + "Please put only ONE dataset under dataset_dir.") basename = datanames[0] logger.info("************************************************") logger.info("******** Processing dataset " + basename[:-5].capitalize() + " ********") logger.info("************************************************") logger.debug("Version: {}. Description: {}".format(VERSION, DESCRIPTION)) ##### Begin creating training set and test set ##### logger.info("Reading training set and test set...") D_train = AutoDLDataset(os.path.join(dataset_dir, basename, "train")) D_test = AutoDLDataset(os.path.join(dataset_dir, basename, "test")) ##### End creating training set and test set ##### ## Get correct prediction shape num_examples_test = D_test.get_metadata().size() output_dim = D_test.get_metadata().get_output_size() correct_prediction_shape = (num_examples_test, output_dim) # 20 min for participants to initializing and install other packages # try: # init_time_budget = 20 * 60 # time budget for initilization. # timer = Timer() # timer.set(init_time_budget) # with timer.time_limit("Initialization"): ##### Begin creating model ##### logger.info("Creating model...this process should not exceed 20min.") from model import Model # in participants' model.py # The metadata of D_train and D_test only differ in sample_count M = Model(D_train.get_metadata(), model_config_name=model_config_name, model_config=model_config) ###### End creating model ###### # except TimeoutException as e: # logger.info("[-] Initialization phase exceeded time budget. Move to train/predict phase") # except Exception as e: # logger.error("Failed to initializing model.") # logger.error("Encountered exception:\n" + str(e), exc_info=True) # # Mark starting time of ingestion start = time.time() logger.info("=" * 5 + " Start core part of ingestion program. " + "Version: {} ".format(VERSION) + "=" * 5) write_start_file(output_dir, start_time=start, time_budget=time_budget, task_name=basename.split(".")[0]) try: # Check if the model has methods `train` and `test`. for attr in ["train", "test"]: if not hasattr(M, attr): raise ModelApiError( "Your model object doesn't have the method " + "`{}`. Please implement it in model.py.") # Check if model.py uses new done_training API instead of marking # stopping by returning None use_done_training_api = hasattr(M, "done_training") if not use_done_training_api: logger.warning( "Your model object doesn't have an attribute " + "`done_training`. But this is necessary for ingestion " + "program to know whether the model has done training " + "and to decide whether to proceed more training. " + "Please add this attribute to your model.") # Keeping track of how many predictions are made prediction_order_number = 0 # Start the CORE PART: train/predict process while not (use_done_training_api and M.done_training): remaining_time_budget = start + time_budget - time.time() # Train the model logger.info("Begin training the model...") M.train(D_train.get_dataset(), remaining_time_budget=remaining_time_budget) logger.info("Finished training the model.") # Make predictions using the trained model logger.info("Begin testing the model by making predictions " + "on test set...") remaining_time_budget = start + time_budget - time.time() Y_pred = M.test(D_test.get_dataset(), remaining_time_budget=remaining_time_budget) logger.info("Finished making predictions.") if Y_pred is None: # Stop train/predict process if Y_pred is None logger.info("The method model.test returned `None`. " + "Stop train/predict process.") break else: # Check if the prediction has good shape prediction_shape = tuple(Y_pred.shape) if prediction_shape != correct_prediction_shape: raise BadPredictionShapeError( "Bad prediction shape! Expected {} but got {}.".format( correct_prediction_shape, prediction_shape)) remaining_time_budget = start + time_budget_approx - time.time() if remaining_time_budget < 0: break # Write timestamp to 'start.txt' write_timestamp(output_dir, predict_idx=prediction_order_number, timestamp=time.time()) # Prediction files: adult.predict_0, adult.predict_1, ... filename_test = basename[:-5] + ".predict_" + str( prediction_order_number) # Write predictions to output_dir data_io.write(os.path.join(output_dir, filename_test), Y_pred) prediction_order_number += 1 logger.info( "[+] {0:d} predictions made, time spent so far {1:.2f} sec". format(prediction_order_number, time.time() - start)) remaining_time_budget = start + time_budget_approx - time.time() logger.info( "[+] Time left {0:.2f} sec".format(remaining_time_budget)) except Exception as e: ingestion_success = False logger.info("Failed to run ingestion.") logger.error("Encountered exception:\n" + str(e), exc_info=True) # Finishing ingestion program end_time = time.time() overall_time_spent = end_time - start # Write overall_time_spent to a end.txt file end_filename = "end.txt" with open(os.path.join(output_dir, end_filename), "w") as f: f.write("ingestion_duration: " + str(overall_time_spent) + "\n") f.write("ingestion_success: " + str(int(ingestion_success)) + "\n") f.write("end_time: " + str(end_time) + "\n") logger.info("Wrote the file {} marking the end of ingestion.".format( end_filename)) if ingestion_success: logger.info("[+] Done. Ingestion program successfully terminated.") logger.info("[+] Overall time spent %5.2f sec " % overall_time_spent) else: logger.info( "[-] Done, but encountered some errors during ingestion.") logger.info("[-] Overall time spent %5.2f sec " % overall_time_spent) # Copy all files in output_dir to score_dir os.system("cp -R {} {}".format(os.path.join(output_dir, "*"), score_dir)) logger.debug("Copied all ingestion output to scoring output directory.") logger.info("[Ingestion terminated]")
def echo_nest_analysis(fname_song, fname_config=None): """ Get track details via Echo Nest API. """ if not fname_config: fname_config = 'audio_config.yml' fname_config = os.path.abspath(fname_config) path_work = os.path.dirname(fname_config) path_analysis = os.path.join(path_work, 'Audio Analysis') if not os.path.isdir(path_analysis): os.mkdir(path_analysis) fname_song = os.path.basename(fname_song) b, e = os.path.splitext(fname_song) #if not (e == '.mp3' or e == '.m4a'): # fname_song = b + '.mp3' fname_analysis = b + '.full.yml' f = os.path.join(path_analysis, fname_analysis) if os.path.isfile(f): print('Load existing analysis') analysis, meta = data_io.read(f) else: # Read config. info, meta = data_io.read(fname_config) if not info['songs']: info['songs'] = {} if not 'songs' in info: info['songs'] = {} # Configure Echo Nest API key. pyechonest.config.ECHO_NEST_API_KEY = info['api_key'] # Load track details. if fname_song not in info['songs']: print('Upload new song to Echo Nest: %s' % fname_song) info['songs'][fname_song] = {} track = pyechonest.track.track_from_filename(fname_song) info['songs'][fname_song]['id'] = track.id info['songs'][fname_song]['analysis_url'] = track.analysis_url # Save updated config. data_io.write(fname_config, info) else: print('Download song analysis from Echo Nest: %s' % fname_song) track = pyechonest.track.track_from_id(info['songs'][fname_song]['id']) print('Retrieve full analysis from url') r = requests.get(track.analysis_url) analysis = r.json() print('Save analysis to cache folder') f = os.path.join(path_analysis, fname_analysis) data_io.write(f, analysis) # Done. return analysis
def _epsilon_lrp(self, R, epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 ''' N, H, W, D = self.X.shape hpool, wpool = self.pool hstride, wstride = self.stride # logger.info("=======================Lrp Sumpool Check===============================") # logger.info("the LPR Sumpool x shape is {0}".format(self.X.shape)) # logger.info("the LPR Sumpool x shape value is : {0}".format(np.sum(~np.isnan(self.X)))) # logger.info("the LPR Sumpool hpool and wppol is {0}".format(self.pool)) # logger.info("the LRP Sumpool h and w stride is {0}".format(self.stride)) #assume the given pooling and stride parameters are carefully chosen. Hout = int((H - hpool) / hstride + 1) Wout = int((W - wpool) / wstride + 1) # logger.info("LRP Sumpool {} : {}, {} : {} ".format('H',Hout,'W', Wout)) Rx = np.zeros(self.X.shape) normalizer = 1. / np.sqrt( hpool * wpool) #factor in normalizer applied to Y in the forward pass R_norm = R / (self.Y / normalizer + epsilon * ((self.Y >= 0) * 2 - 1.)) # logger.info("initial Sumpool Relevance is {0}".format(R.shape)) # logger.info("the LPR Sumpool Relevance value is : {0}".format(np.sum(~np.isnan(R)))) # logger.info("the Sumpool lrp R normalizer is {0}".format(R_norm.shape)) # logger.info("the LPR Sumpool normalizer value is : {0}".format(np.sum(~np.isnan(R_norm)))) Rsave = Rx if Rx.shape[2] == 2: Rsave3 = np.reshape(Rsave, [10, 40]) # print('Sum polling 3 passed') elif Rx.shape[2] == 10: Rsave2 = np.reshape(Rsave, [10, 250]) # print('Sum polling 2 passed') else: Rsave1 = np.reshape(Rsave, [28, 280]) # print('Sum polling 1 passed') for i in range(Hout): for j in range(Wout): Z = self.X[:, i * hstride:i * hstride + hpool, j * wstride:j * wstride + wpool, :] #input activations. sp_check = Z * (R_norm[:, i:i + 1, j:j + 1, :]) # logger.info("during sumpool {} and {} lrp X shape is {}".format(i,j,Z.shape)) # logger.info("during sumpool {} and {} lrp X value is : {}".format(i,j,np.sum(~np.isnan(Z)))) # logger.info("during sumpool {} and {} lrp Rx shape is {}".format(i,j,Rx.shape)) # logger.info("during sumpool {} and {} lrp Rx value is : {}".format(i,j,np.sum(~np.isnan(Rx)))) # logger.info("during sumpool {} and {} weight shape is {}".format(i,j,sp_check.shape)) # logger.info("during sumpool {} and {} weight value is : {}".format(i,j,np.sum(~np.isnan(sp_check)))) Rx[:, i * hstride:i * hstride + hpool:, j * wstride:j * wstride + wpool:, :] += sp_check # logger.info("during sumpool {} and {} lrp Rx final is {}".format(i,j,Rx.shape)) # logger.info("during sumpool {} and {} lrp Rx final value is : {}".format(i,j,np.sum(~np.isnan(Rx)))) if Rx.shape[2] == 2: Rlim3 = np.reshape(Rx, [10, 40]) Rsave3 = np.concatenate((Rsave3, Rlim3), axis=1) Rfile3 = '../r_array/sumpol_3.npy' data_io.write(Rsave3, Rfile3) # print("3rd sumpol Rx is saved") elif Rx.shape[2] == 10: Rlim2 = np.reshape(Rx, [10, 250]) Rsave2 = np.concatenate((Rsave2, Rlim2), axis=1) Rfile2 = '../r_array/sumpol_2.npy' data_io.write(Rsave2, Rfile2) # print("2nd sumpol Rx is saved") else: Rlim1 = np.reshape(Rx, [28, 280]) Rsave1 = np.concatenate((Rsave1, Rlim1), axis=1) Rfile1 = '../r_array/sumpol_1.npy' data_io.write(Rsave1, Rfile1) # print("1st sumpol Rx is saved") # Rfile = '../r_array/sumpool'+str(i)+'_Hout_.npy' # data_io.write(Rx, Rfile) # print("Sumpool Rx is saved") data_io.write(Rx, '../r_array/sumpoll.npy') # print("sumpoll Rx is saved") # logger.info("after sumpool lrp weight shape is {0}".format(sp_check.shape)) # logger.info("after sumpool lrp weight value is : {0}".format(np.sum(~np.isnan(sp_check)))) # logger.info("after sumpool lrp X shape is {0}".format(Z.shape)) # logger.info("after sumpool lrp X value is : {0}".format(np.sum(~np.isnan(Z)))) # logger.info("after sumpool lrp output of shape is {0}".format(Rx.shape)) # logger.info("after sumpool lrp output value is : {0}".format(np.sum(~np.isnan(Rx)))) sp_c = '../r_array/x_lrp_sumpool_weight({},{},{}).npy'.format( sp_check.shape[0], sp_check.shape[1], sp_check.shape[2]) data_io.write(sp_check, sp_c) xs_lrp = '../r_array/x_lrp_sumpool_input({},{},{}).npy'.format( Z.shape[0], Z.shape[1], Z.shape[2]) data_io.write(Z, xs_lrp) lrps_check = '../r_array/sumpool_lrp({},{},{}).npy'.format( Rx.shape[0], Rx.shape[1], Rx.shape[2]) data_io.write(Rx, lrps_check) # logger.info("=======================Lrp Sumpool Done===============================") return Rx
Y_train, test_size=0.2, random_state=42) model.fit_generator(aug.flow(XTrain, YTrain, batch_size=64), shuffle=True, epochs=1000, steps_per_epoch=len(XTrain) // 64, validation_data=(XTest, YTest), callbacks=[es, mc]) saved_model = load_model('saved_model/best_model.h5') Y_hat_train = saved_model.predict(X_train) Y_hat_valid = saved_model.predict(X_valid) Y_hat_test = saved_model.predict(X_test) results_name = results_dir + data_name write(results_name + '_train.predict', Y_hat_train) write(results_name + '_valid.predict', Y_hat_valid) write(results_name + '_test.predict', Y_hat_test) metric_name, scoring_function = 'auc_binary', roc_auc_score print('Training score for the', metric_name, 'metric = %5.4f' % scoring_function(Y_train, Y_hat_train)) # print('Valid score for the', metric_name, 'metric = %5.4f' % scoring_function(Y_valid, Y_hat_valid)) # print('Test score for the', metric_name, 'metric = %5.4f' % scoring_function(Y_test, Y_hat_test)) print('Ideal score for the', metric_name, 'metric = %5.4f' % scoring_function(Y_train, Y_train))
tiles_grid, tiles_rack = tiles.carve_tiles(img, info) print(fname_img) # Save specified tiles to files. for label, ij in info_img.items(): print(ij) for mn, tile in tiles_grid: # Got a match? if ij == mn: print(label) fname = 'tile_grid_%s.png' % (label) f = os.path.join(path_data, 'tiles', fname) io.write(f, tile) for fname_img, info_img in info['reference']['rack'].items(): f = os.path.join(path_data, 'reference', fname_img) img, meta = io.read(f) tiles_grid, tiles_rack = tiles.carve_tiles(img, info) # Save specified tiles to files. for label, ij in info_img.items(): for mn, tile in tiles_rack: # Got a match? if ij == mn: print(label)
def blender(sd, srd, Nworkers, stop_writing, output_dir, basename, Lstart, Ltime_budget, Lfold): try: split = int(len(sd.LD.data['Y_train']) * 0.5) cycle = 1 #cycle 0 is all zeros best_score = 0 atbest = 0 while (1): try: time.sleep(0.5) # limit to 100 predictions if cycle > (time.time() - Lstart) / Ltime_budget * 100: time.sleep(1) continue temp_workers_data = [] workers_data = [] for wr_no in range(Nworkers): exec("wr_data = sd.worker" + str(wr_no)) if wr_data['done'] > 0: temp_workers_data.append(wr_data) wgroups = [i['blend_group'] for i in temp_workers_data] for group in np.unique(wgroups): twdata = [ i for i in temp_workers_data if i['blend_group'] == group ] twdata = sorted(twdata, key=itemgetter('score'), reverse=True) workers_data.append(twdata[0]) try: workers_data.append(twdata[1]) except: pass print group, len(twdata), len(workers_data) # this is patch for codalab VM workers_data_raw = [] raw0_data = srd.raw_model if raw0_data['done'] == 1: workers_data_raw.append(raw0_data) raw1_data = srd.raw_model1 if raw1_data['done'] == 1: workers_data_raw.append(raw1_data) raw2_data = srd.raw_model2 if raw2_data['done'] == 1: workers_data_raw.append(raw2_data) raw3_data = srd.raw_model3 if raw3_data['done'] == 1: workers_data_raw.append(raw3_data) raw4_data = srd.raw_model4 if raw4_data['done'] == 1: workers_data_raw.append(raw4_data) if len(workers_data_raw) > 0: workers_data_raw = sorted(workers_data_raw, key=itemgetter('score'), reverse=True) workers_data.append(workers_data_raw[0]) try: workers_data.append(workers_data_raw[1]) except: pass try: workers_data.append(workers_data_raw[2]) except: pass workers_data = sorted(workers_data, key=itemgetter('score'), reverse=True) if len(workers_data) > 0: worker0 = workers_data[0] preds_valid = worker0['preds_valid'] preds_test = worker0['preds_test'] y = sd.yt_raw[split:] if Lfold > 1: y = sd.yt_raw x = worker0['preds_2fld'] exec('s0 = libscores.' + sd.LD.info['metric'] + '(y, x)') best_score = s0 #short run can't wait for blend (usable only for AutoML 1) try: if s0 > atbest and cycle < 2: atbest = best_score * 0.9 #not reilable score if sd.LD.info['target_num'] == 1: preds_valid = preds_valid[:, 1] preds_test = preds_test[:, 1] preds_valid = np.clip(preds_valid, 0, 1) preds_test = np.clip(preds_test, 0, 1) filename_valid = basename + '_valid_' + str( cycle).zfill(3) + '.predict' data_io.write( os.path.join(output_dir, filename_valid), preds_valid) filename_test = basename + '_test_' + str( cycle).zfill(3) + '.predict' data_io.write( os.path.join(output_dir, filename_test), preds_test) cycle += 1 except: pass if Lfold < 4: Lsample = 4 else: Lsample = 6 xa = 0 Lssample = Lsample - 1 for iter_worker in itertools.combinations( workers_data[:Lsample], 2): xa = xa + 1 worker0 = iter_worker[0] worker1 = iter_worker[1] s01, validt, testt = blend2( worker0['preds_2fld'], worker1['preds_2fld'], y, sd.LD.info['metric'], worker0['preds_valid'], worker1['preds_valid'], worker0['preds_test'], worker1['preds_test']) if s01 > best_score: best_score = s01 preds_valid = validt preds_test = testt xa = 0 for iter_worker in itertools.combinations( workers_data[:Lssample], 3): xa = xa + 1 worker0 = iter_worker[0] worker1 = iter_worker[1] worker2 = iter_worker[2] s012, validt, testt = blend3( worker0['preds_2fld'], worker1['preds_2fld'], worker2['preds_2fld'], y, sd.LD.info['metric'], worker0['preds_valid'], worker1['preds_valid'], worker2['preds_valid'], worker0['preds_test'], worker1['preds_test'], worker2['preds_test']) if s012 > best_score: best_score = s012 preds_valid = validt preds_test = testt if stop_writing.is_set( ) == False: #until last 10 seconds (event signal) if best_score > atbest: atbest = best_score print "naj =", workers_data[0][ 'score'], best_score, atbest if sd.LD.info['target_num'] == 1: preds_valid = preds_valid[:, 1] preds_test = preds_test[:, 1] preds_valid = np.clip(preds_valid, 0, 1) preds_test = np.clip(preds_test, 0, 1) filename_valid = basename + '_valid_' + str( cycle).zfill(3) + '.predict' data_io.write( os.path.join(output_dir, filename_valid), preds_valid) filename_test = basename + '_test_' + str( cycle).zfill(3) + '.predict' data_io.write( os.path.join(output_dir, filename_test), preds_test) cycle += 1 else: print 'stop writing is set' except Exception as e: print 'exception in blender process' + ' ' + str(e) # in case of any problem, let's try again except Exception as e: print 'exception in blender main process' + ' ' + str(e)
(time.time() - start)) # Make predictions # ----------------- Y_train = M_clf.predict(D.data['X_train']) # Y_valid = M_clf.predict(D.data['X_valid']) Y_test = M_clf.predict(D.data['X_test']) print("[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start)) # Write results # ------------- filename_train = basename + '_train.predict' # filename_valid = basename + '_valid.predict' filename_test = basename + '_test.predict' print(verbose, "======== Saving results to: " + output_dir) data_io.write(os.path.join(output_dir, filename_train), Y_train) # data_io.write(os.path.join(output_dir,filename_valid), Y_valid) data_io.write(os.path.join(output_dir, filename_test), Y_test) print("[+] Results saved, time spent so far %5.2f sec" % (time.time() - start)) time_spent = time.time() - start time_left_over = time_budget - time_spent print("[+] End cycle, time left %5.2f sec" % time_left_over) if time_left_over <= 0: exit() time_spent = time.time() - start time_left_over = time_budget - time_spent overall_time_spent = time.time() - overall_start if execution_success:
input_dir = argv[1] datanames = data_io.inventory_data(input_dir) # The output directory will contain the scores, create it if it does not exist output_dir = argv[2] data_io.mkdir(output_dir) if len(datanames) == 0: print("****** No data found ******") # Loop over datasets for basename in datanames: print("****** Processing " + basename.capitalize() + " ******") # Fake predictions on validation and test data X = data_io.data( path.join(input_dir, basename, basename + '_valid.data')) Yvalid = random.rand(X.shape[0]) X = data_io.data( path.join(input_dir, basename, basename + '_test.data')) Ytest = random.rand(X.shape[0]) # Write results to files data_io.write(path.join(output_dir, basename + '_valid.predict'), Yvalid) data_io.write(path.join(output_dir, basename + '_test.predict'), Ytest) # Lots of debug code... data_io.show_io(input_dir, output_dir) data_io.show_version() exit(0)
def predict (LD, output_dir, basename): import copy import os import numpy as np import libscores import data_converter from sklearn import preprocessing, ensemble from sklearn.utils import shuffle LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1) Y_train = LD.data['Y_train'] X_train = LD.data['X_train'] Xta = np.copy(X_train) X_valid = LD.data['X_valid'] X_test = LD.data['X_test'] Xtv = np.copy(X_valid) Xts = np.copy(X_test) import xgboost as xgb if LD.info['name']== 'alexis': model = ensemble.RandomForestClassifier(max_depth=140, n_estimators=1800, n_jobs=-1, random_state=0, verbose=0, warm_start=True) model2 = ensemble.RandomForestClassifier(max_depth=140, n_estimators=1800, n_jobs=-1, random_state=1, verbose=0, warm_start=True) model.fit(X_train, Y_train) model2.fit(X_train, Y_train) preds_valid0 = model.predict_proba(X_valid) preds_test0 = model.predict_proba(X_test) preds_valid2 = model2.predict_proba(X_valid) preds_test2 = model2.predict_proba(X_test) preds_valid0 = np.array(preds_valid0) preds_valid2 = np.array(preds_valid2) preds_test0 = np.array(preds_test0) preds_test2 = np.array(preds_test2) preds_valid = (preds_valid0 + preds_valid2)/2 preds_test = (preds_test0 + preds_test2)/2 preds_valid = preds_valid[:, :, 1] preds_valid = preds_valid.T preds_test = preds_test[:, :, 1] preds_test = preds_test.T if LD.info['name']== 'dionis': Lest = 600 #600 will consume cca 250 GB of RAM, use 50 for similar result #Lest = 50 model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=0) model.fit(X_train, Y_train) preds_valid0 = model.predict_proba(X_valid) preds_test0 = model.predict_proba(X_test) model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=1) model.fit(X_train, Y_train) preds_valid1 = model.predict_proba(X_valid) preds_test1 = model.predict_proba(X_test) model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=2) model.fit(X_train, Y_train) preds_valid2 = model.predict_proba(X_valid) preds_test2 = model.predict_proba(X_test) model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=3) model.fit(X_train, Y_train) preds_valid3 = model.predict_proba(X_valid) preds_test3 = model.predict_proba(X_test) model = ensemble.RandomForestClassifier( n_jobs=-1, n_estimators=Lest, random_state=4) model.fit(X_train, Y_train) preds_valid4 = model.predict_proba(X_valid) preds_test4 = model.predict_proba(X_test) preds_valid = (preds_valid0 + preds_valid1 + preds_valid2 + preds_valid3 + preds_valid4) # /5 should be included (bug) preds_test = (preds_test0 + preds_test1 + preds_test2 + preds_test3 + preds_test4) # /5 should be included (bug) if LD.info['name']== 'grigoris': model = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=0, verbose=0) model2 = linear_model.LogisticRegression(penalty='l1', random_state=1, n_jobs=-1, C=0.008) model3 = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=1, verbose=0) model4 = ensemble.RandomForestClassifier(criterion='entropy', max_features=0.05, max_depth=5, n_estimators=120, n_jobs=-1, random_state=2, verbose=0) preds_valid = np.zeros((X_valid.shape[0], Y_train.shape[1])) preds_test = np.zeros((X_test.shape[0], Y_train.shape[1])) for pyt in range(Y_train.shape[1]): print pyt ytp = Y_train[:, pyt] model.fit(X_train, ytp) model2.fit(X_train, ytp) model3.fit(X_train, ytp) model4.fit(X_train, ytp) preds1v= model.predict_proba (X_valid)[:, 1] preds2v= model2.predict_proba (X_valid)[:, 1] preds3v= model3.predict_proba (X_valid)[:, 1] preds4v= model4.predict_proba (X_valid)[:, 1] predsv = (preds1v + preds2v + preds3v + preds4v)/4 preds_valid[:, pyt] = predsv preds1t= model.predict_proba (X_test)[:, 1] preds2t= model2.predict_proba (X_test)[:, 1] preds3t= model3.predict_proba (X_test)[:, 1] preds4t= model4.predict_proba (X_test)[:, 1] predst = (preds1t + preds2t + preds3t + preds4t)/4 preds_test[:, pyt] = predst if LD.info['name']== 'jannis': Xd = X_train[Y_train==0] yd = Y_train[Y_train==0] for a in range(18): X_train = np.vstack([X_train, Xd]) Y_train = np.hstack([Y_train, yd]) Xd = X_train[Y_train==2] yd = Y_train[Y_train==2] X_train = np.vstack([X_train, Xd]) Y_train = np.hstack([Y_train, yd]) Y_train_raw = np.array(data_converter.convert_to_bin(Y_train, len(np.unique(Y_train)), False)) preds_valid = np.zeros((X_valid.shape[0], Y_train_raw.shape[1])) preds_test = np.zeros((X_test.shape[0], Y_train_raw.shape[1])) for pyt in range(Y_train_raw.shape[1]): if pyt == 0: Lbs = 0.2 else: Lbs = 0.5 model = xgb.XGBClassifier(max_depth=30, learning_rate=0.05, n_estimators=100, silent=True, objective='binary:logistic', nthread=-1, gamma=0, min_child_weight=80, max_delta_step=1, subsample=1, colsample_bytree=1, base_score=Lbs, seed=0, missing=None) ytp = Y_train_raw[:, pyt] model.fit(X_train, ytp) preds1v= model.predict_proba (X_valid)[:, 1] preds_valid[:, pyt] = preds1v preds1t= model.predict_proba (X_test)[:, 1] preds_test[:, pyt] = preds1t if LD.info['name']== 'wallis': model = naive_bayes.MultinomialNB(alpha=0.02) model2 = xgb.XGBClassifier(max_depth=5, learning_rate=0.05, n_estimators=1200, silent=True, objective='multi:softprob', nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None) model.fit(X_train, Y_train) preds_valid1 = model.predict_proba(X_valid) preds_test1 = model.predict_proba(X_test) model2.fit(X_train, Y_train) preds_valid2 = model2.predict_proba(X_valid) preds_test2 = model2.predict_proba(X_test) preds_valid = (preds_valid1 +preds_valid2)/2 preds_test = (preds_test1 +preds_test2)/2 import data_io if LD.info['target_num'] == 1: preds_valid = preds_valid[:,1] preds_test = preds_test[:,1] preds_valid = np.clip(preds_valid,0,1) preds_test = np.clip(preds_test,0,1) data_io.write(os.path.join(output_dir, basename + '_valid_000.predict'), preds_valid) data_io.write(os.path.join(output_dir,basename + '_test_000.predict'), preds_test)
def _epsilon_lrp(self, R, epsilon): ''' LRP according to Eq(58) in DOI: 10.1371/journal.pone.0130140 ''' N, Hout, Wout, NF = R.shape hf, wf, df, NF = self.W.shape hstride, wstride = self.stride Rx = np.zeros_like(self.X, dtype=np.float) R_norm = R / (self.Y + epsilon * ((self.Y >= 0) * 2 - 1.)) # logger.info("=======================Lrp Conv Check===============================") # logger.info("initial Relevance is {0}".format(R.shape)) # logger.info("initial Relevance value is : {0}".format(np.sum(~np.isnan(R)))) # logger.info("the conv lrp X shape is {0}".format(Rx.shape)) # logger.info("the conv lrp X shape value is : {0}".format(np.sum(~np.isnan(Rx)))) # logger.info("the conv lrp filter size is ({},{},{},{})".format(self.fh, self.fw, self.fd, self.n)) # logger.info("the conv lrp W shape is {0}".format(self.W.shape)) # logger.info("the conv lrp W shape value is : {0}".format(np.sum(~np.isnan(self.W)))) # logger.info("the conv lrp stride shape is {0}".format(self.stride)) # logger.info("{} : {}, {} : {} ".format('lrp H',Hout,'lrp W', Wout)) Rsave = Rx # logger.info("the Relavance score shape is {0}".format(R_norm.shape)) # logger.info("the Relavance score value is : {0}".format(np.sum(~np.isnan(R_norm)))) # logger.info("before lrp x shape is {0}".format(self.X.shape)) # logger.info("before lrp x value is : {0}".format(np.sum(~np.isnan(self.X)))) if Rx.shape[2] == 1: Rsave4 = np.reshape(Rsave, [10, 10]) elif Rx.shape[2] == 5: Rsave3 = np.reshape(Rsave, [25, 25]) elif Rx.shape[2] == 14: Rsave2 = np.reshape(Rsave, [196, 10]) else: Rsave1 = np.reshape(Rsave, [1024, 1]) for i in range(Hout): for j in range(Wout): if self.lrp_aware: Z = self.Z[:, i, j, ...] else: Z = self.W[na, ...] * self.X[:, i * hstride:i * hstride + hf, j * wstride:j * wstride + wf, :, na] weight_lrp = Z * (R_norm[:, i:i + 1, j:j + 1, na, :]) # logger.info("during {} and {} lrp X shape is {}".format(i,j,Z.shape)) # logger.info("during {} and {} lrp X value is : {}".format(i,j,np.sum(~np.isnan(Z)))) # logger.info("during {} and {} lrp Rx shape is {}".format(i,j,Rx.shape)) # logger.info("during {} and {} lrp Rx value is : {}".format(i,j,np.sum(~np.isnan(Rx)))) # logger.info("during {} and {} weight shape is {}".format(i,j,weight_lrp.shape)) # logger.info("during {} and {} weight value is : {}".format(i,j,np.sum(~np.isnan(weight_lrp)))) Rx[:, i * hstride:i * hstride + hf:, j * wstride:j * wstride + wf:, :] += (weight_lrp).sum(axis=4) # logger.info("during {} and {} lrp Rx final is {}".format(i,j,Rx.shape)) # logger.info("during {} and {} Rx final value is : {}".format(i,j,np.sum(~np.isnan(Rx)))) if Rx.shape[2] == 1: Rlim4 = np.reshape(Rx, [10, 10]) Rsave4 = np.concatenate((Rsave4, Rlim4), axis=1) Rfile4 = '../r_array/convolution_4.npy' data_io.write(Rsave4, Rfile4) elif Rx.shape[2] == 5: Rlim3 = np.reshape(Rx, [25, 25]) Rsave3 = np.concatenate((Rsave3, Rlim3), axis=1) Rfile3 = '../r_array/convolution_3.npy' data_io.write(Rsave3, Rfile3) elif Rx.shape[2] == 14: Rlim2 = np.reshape(Rx, [196, 10]) Rsave2 = np.concatenate((Rsave2, Rlim2), axis=1) Rfile2 = '../r_array/convolution_2.npy' data_io.write(Rsave2, Rfile2) else: Rlim1 = np.reshape(Rx, [1024, 1]) Rsave1 = np.concatenate((Rsave1, Rlim1), axis=1) Rfile1 = '../r_array/convolution_1.npy' data_io.write(Rsave1, Rfile1) # logger.info("after conv lrp weight shape is {0}".format(weight_lrp.shape)) # logger.info("after conv lrp weight value is : {0}".format(np.sum(~np.isnan(weight_lrp)))) weight_l = '../r_array/x_lrp_conv_weight({},{},{}).npy'.format( weight_lrp.shape[0], weight_lrp.shape[1], weight_lrp.shape[2]) data_io.write(weight_lrp, weight_l) # logger.info("after conv lrp tensordot X shape is {0}".format(self.X.shape)) # logger.info("after conv lrp tensordot X value is : {0}".format(np.sum(~np.isnan(self.X)))) x_lrp = '../r_array/x_lrp_conv_input({},{},{}).npy'.format( self.X.shape[0], self.X.shape[1], self.X.shape[2]) data_io.write(self.X, x_lrp) # logger.info("after conv lrp output of shape is {0}".format(Rx.shape)) # logger.info("after conv lrp output value is : {0}".format(np.sum(~np.isnan(Rx)))) lrp_check = '../r_array/conv_lrp({},{},{}).npy'.format( Rx.shape[0], Rx.shape[1], Rx.shape[2]) data_io.write(Rx, lrp_check) data_io.write(Rx, '../r_array/convolution.npy') # logger.info("=======================Lrp Conv Done===============================") return Rx
def _main(args): # Mark starting time of ingestion start = time.time() logger.info("=" * 5 + " Start ingestion program. ") #### Check whether everything went well ingestion_success = True dataset_dir = args.dataset_dir output_dir = args.output_dir ingestion_program_dir = args.ingestion_program_dir code_dir = args.code_dir score_dir = args.score_dir time_budget = args.time_budget if dataset_dir.endswith('run/input') and\ code_dir.endswith('run/program'): logger.debug( "Since dataset_dir ends with 'run/input' and code_dir " "ends with 'run/program', suppose running on " + "CodaLab platform. Modify dataset_dir to 'run/input_data' " "and code_dir to 'run/submission'. " + "Directory parsing should be more flexible in the code of " + "compute worker: we need explicit directories for " + "dataset_dir and code_dir.") dataset_dir = dataset_dir.replace('run/input', 'run/input_data') code_dir = code_dir.replace('run/program', 'run/submission') # Show directories for debugging logger.debug("sys.argv = " + str(sys.argv)) logger.debug("Using dataset_dir: " + dataset_dir) logger.debug("Using output_dir: " + output_dir) logger.debug("Using ingestion_program_dir: " + ingestion_program_dir) logger.debug("Using code_dir: " + code_dir) # Our libraries path.append(ingestion_program_dir) path.append(code_dir) #IG: to allow submitting the starting kit as sample submission path.append(code_dir + '/sample_code_submission') import data_io from dataset import AutoSpeechDataset # THE class of AutoNLP datasets #### INVENTORY DATA (and sort dataset names alphabetically) datanames = data_io.inventory_data(dataset_dir) #### Delete zip files and metadata file datanames = [x for x in datanames if x.endswith('.data')] if len(datanames) != 1: raise ValueError("{} datasets found in dataset_dir={}!\n"\ .format(len(datanames), dataset_dir) + "Please put only ONE dataset under dataset_dir.") basename = datanames[0] D = AutoSpeechDataset(os.path.join(dataset_dir, basename)) metadata = D.get_metadata() time_budget = metadata.get("time_budget", time_budget) logger.info("Time budget: {}".format(time_budget)) write_start_file(output_dir, start_time=start, time_budget=time_budget, task_name=basename.split('.')[0]) logger.info("************************************************") logger.info("******** Processing dataset " + basename[:-5].capitalize() + " ********") logger.info("************************************************") ##### Begin creating training set and test set ##### logger.info("Reading training set and test set...") D.read_dataset() ##### End creating training set and test set ##### ## Get correct prediction shape num_examples_test = D.get_test_num() output_dim = D.get_class_num() correct_prediction_shape = (num_examples_test, output_dim) try: # ========= Creating a model timer = Timer() timer.set( 20 * 60 ) # 20 min for participants to initializing and install other packages with timer.time_limit("Importing model"): from model import Model # in participants' model.py ##### Begin creating model ##### logger.info("Creating model...") with timer.time_limit('Initialization'): M = Model(metadata) ###### End creating model ###### except TimeoutException as e: logger.info( "[-] Initialization phase exceeded time budget. Move to train/predict phase" ) except Exception as e: logger.info("Failed to initializing model.") logger.error("Encountered exception:\n" + str(e), exc_info=True) raise finally: try: timer = Timer() timer.set(time_budget) # Check if the model has methods `train` and `test`. for attr in ['train', 'test']: if not hasattr(M, attr): raise ModelApiError( "Your model object doesn't have the method " + "`{}`. Please implement it in model.py.") # Check if model.py uses new done_training API instead of marking # stopping by returning None use_done_training_api = hasattr(M, 'done_training') if not use_done_training_api: logger.warning( "Your model object doesn't have an attribute " + "`done_training`. But this is necessary for ingestion " + "program to know whether the model has done training " + "and to decide whether to proceed more training. " + "Please add this attribute to your model.") # Keeping track of how many predictions are made prediction_order_number = 0 # Start the CORE PART: train/predict process while (not (use_done_training_api and M.done_training)): # Train the model logger.info("Begin training the model...") remaining_time_budget = timer.remain with timer.time_limit('training'): M.train(D.get_train(), remaining_time_budget=timer.remain) logger.info("Finished training the model.") # Make predictions using the trained model logger.info("Begin testing the model by making predictions " + "on test set...") remaining_time_budget = timer.remain with timer.time_limit('predicting'): Y_pred = M.test( D.get_test(), remaining_time_budget=remaining_time_budget) logger.info("Finished making predictions.") if Y_pred is None: # Stop train/predict process if Y_pred is None logger.info("The method model.test returned `None`. " + "Stop train/predict process.") break else: # Check if the prediction has good shape prediction_shape = tuple(Y_pred.shape) if prediction_shape != correct_prediction_shape: raise BadPredictionShapeError( "Bad prediction shape! Expected {} but got {}."\ .format(correct_prediction_shape, prediction_shape) ) # Write timestamp to 'start.txt' write_timestamp(output_dir, predict_idx=prediction_order_number, timestamp=timer.exec) # Prediction files: adult.predict_0, adult.predict_1, ... filename_test = basename[:-5] + '.predict_' +\ str(prediction_order_number) # Write predictions to output_dir tmp_pred = np.argmax(Y_pred, axis=1) # data_io.write(os.path.join(output_dir,filename_test), Y_pred) data_io.write(os.path.join(output_dir, filename_test), tmp_pred) prediction_order_number += 1 logger.info("[+] {0:d} predictions made, time spent so far {1:.2f} sec"\ .format(prediction_order_number, time.time() - start)) logger.info("[+] Time left {0:.2f} sec".format(timer.remain)) except TimeoutException as e: logger.info( "[-] Ingestion program exceeded time budget. Predictions " "made so far will be used for evaluation.") except Exception as e: ingestion_success = False logger.info("Failed to run ingestion.") logger.error("Encountered exception:\n" + str(e), exc_info=True) raise finally: # Finishing ingestion program end_time = time.time() overall_time_spent = end_time - start # Write overall_time_spent to a end.txt file end_filename = 'end.txt' with open(os.path.join(output_dir, end_filename), 'w') as f: f.write('ingestion_duration: ' + str(overall_time_spent) + '\n') f.write('ingestion_success: ' + str(int(ingestion_success)) + '\n') f.write('end_time: ' + str(end_time) + '\n') logger.info("Wrote the file {} marking the end of ingestion."\ .format(end_filename)) if ingestion_success: logger.info( "[+] Done. Ingestion program successfully terminated.") logger.info("[+] Overall time spent %5.2f sec " % overall_time_spent) else: logger.info( "[-] Done, but encountered some errors during ingestion." ) logger.info("[-] Overall time spent %5.2f sec " % overall_time_spent) # Copy all files in output_dir to score_dir os.system("cp -R {} {}".format(os.path.join(output_dir, '*'), score_dir)) logger.debug( "Copied all ingestion output to scoring output directory.") logger.info("[Ingestion terminated]")
def forward(self, X, lrp_aware=False): ''' Realizes the forward pass of an input through the convolution layer. Parameters ---------- X : numpy.ndarray a network input, shaped (N,H,W,D), with N = batch size H, W, D = input size in heigth, width, depth lrp_aware : bool controls whether the forward pass is to be computed with awareness for multiple following LRP calls. this will sacrifice speed in the forward pass but will save time if multiple LRP calls will follow for the current X, e.g. wit different parameter settings or for multiple target classes. Returns ------- Y : numpy.ndarray the layer outputs. ''' self.lrp_aware = lrp_aware self.X = X N, H, W, D = X.shape # logger.info("=======================Conv Check===============================") # logger.info("the conv X shape is {0}".format(X.shape)) # logger.info("the conv X shape value is : {0}".format(np.sum(~np.isnan(X)))) # logger.info("the conv filter size is ({},{},{},{})".format(self.fh, self.fw, self.fd, self.n)) hf, wf, df, nf = self.W.shape # logger.info("the conv W shape is {0}".format(self.W.shape)) # logger.info("the conv W shape value is : {0}".format(np.sum(~np.isnan(self.W)))) hstride, wstride = self.stride # logger.info("the conv stride shape is {0}".format(self.stride)) numfilters = self.n # logger.info("the number of filter is {}".format(numfilters)) #assume the given pooling and stride parameters are carefully chosen. Hout = (H - hf) // hstride + 1 Wout = (W - wf) // wstride + 1 # logger.info("{} : {}, {} : {} ".format('H',Hout,'W', Wout)) #initialize pooled output self.Y = np.zeros((N, Hout, Wout, numfilters)) if self.lrp_aware: self.Z = np.zeros( (N, Hout, Wout, hf, wf, df, nf)) #initialize container for precomputed forward messages for i in range(Hout): for j in range(Wout): self.Z[:, i, j, ...] = self.W[ na, ...] * self.X[:, i * hstride:i * hstride + hf, j * wstride:j * wstride + wf, :, na] # N, hf, wf, df, nf self.Y[:, i, j, :] = self.Z[:, i, j, ...].sum(axis=(1, 2, 3)) + self.B else: for i in range(Hout): for j in range(Wout): self.Y[:, i, j, :] = np.tensordot( X[:, i * hstride:i * hstride + hf:, j * wstride:j * wstride + wf:, :], self.W, axes=([1, 2, 3], [0, 1, 2])) + self.B # logger.info("after conv tensordot W is {0}".format(self.W.shape)) # logger.info("after conv tensordot W shape value is : {0}".format(np.sum(~np.isnan(self.W)))) # logger.info("after conv tensordot shape is {0}".format(self.X.shape)) # logger.info("after conv tensordot X shape value is : {0}".format(np.sum(~np.isnan(self.X)))) input_check = '../r_array/conv_input({},{},{}).npy'.format( self.X.shape[0], self.X.shape[1], self.X.shape[2]) data_io.write(self.X, input_check) # logger.info("after conv checking tensordot shape is {0}".format(X.shape)) # logger.info("after conv checking tensordot X shape value is : {0}".format(np.sum(~np.isnan(X)))) kind_check = '../r_array/conv_check_input({},{},{}).npy'.format( X.shape[0], X.shape[1], X.shape[2]) data_io.write(X, kind_check) # logger.info("the conv output of shape is {0}".format(self.Y.shape)) # logger.info("after conv checking output shape value is : {0}".format(np.sum(~np.isnan(self.Y)))) output_check = '../r_array/conv_output({},{},{}).npy'.format( self.Y.shape[0], self.Y.shape[1], self.Y.shape[2]) data_io.write(self.Y, output_check) # logger.info("=======================Conv Done================================") return self.Y
raise ValueError("Wrong set type, should be `train` or `test`!") # when the task if binary.classification or regression, transform it to multilabel if task == 'regression': labels = regression_to_multilabel(labels) elif task == 'binary.classification': labels = binary_to_multilabel(labels) return features, labels if __name__ == '__main__': input_dir = '../../../autodl-contrib/raw_datasets/automl' output_dir = '../' for dataset_name in ['dorothea', 'adult']: D = DataManager(dataset_name, input_dir, replace_missing=False, verbose=verbose) X_test, Y_test = _prepare_metadata_features_and_labels(D, set_type='test') X_train, Y_train = _prepare_metadata_features_and_labels( D, set_type='train') print(Y_test.shape) time_budget = 7200 model = AutoSklearnClassifier(time_left_for_this_task=time_budget, per_run_time_limit=time_budget // 10) model.fit(X_train, Y_train) predict_path = os.path.join(output_dir, dataset_name + '.predict') Y_hat_test = model.predict_proba(X_test) print(Y_hat_test.shape) data_io.write(predict_path, Y_hat_test)
time_spent = time.time() - start vprint( verbose, "time spent %5.2f sec" %time_spent) vprint( verbose, "======== Creating model ==========") train_data = D.data['X_train'] labels = D.data['Y_train'] valid_data = D.data['X_valid'] test_data = D.data['X_test'] print (train_data.shape) print (valid_data.shape) print (test_data.shape) print (labels.shape) time_spent = 0 # Initialize time spent learning #if basename in ["albert","dilbert","fabert","robert","volkert"]: (Y_valid, Y_test) = locals()[basename+"_predict"](train_data,labels, valid_data, test_data,output_dir, D.info['time_budget'],D.info['target_num'],D.info['is_sparse']) time_spent = time.time() - start vprint( verbose, "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start)) # Write results filename_valid = basename + '_valid_' + '.predict' data_io.write(os.path.join(output_dir,filename_valid), Y_valid) filename_test = basename + '_test_' + '.predict' data_io.write(os.path.join(output_dir,filename_test), Y_test) vprint( verbose, "[+] Results saved, time spent so far %5.2f sec" % (time.time() - start)) time_spent = time.time() - start overall_time_spent = time.time() - overall_start vprint( verbose, "[+] Done") vprint( verbose, "[+] Overall time spent %5.2f sec " % overall_time_spent)
#R = nn.lrp(ypred,'alphabeta',2) #as Eq(60) from DOI: 10.1371/journal.pone.0130140 #R = nn.lrp(Y[na,i]) #compute first layer relevance according to the true class label ''' yselect = 3 yselect = (np.arange(Y.shape[1])[na,:] == yselect)*1. R = nn.lrp(yselect) #compute first layer relvance for an arbitrarily selected class ''' #undo input normalization for digit drawing. get it back to range [0,1] per pixel x = (x + 1.) / 2. #render input and heatmap as rgb images digit = render.digit_to_rgb(x, scaling=3) hm = render.hm_to_rgb(R, X=x, scaling=3, sigma=2) digit_hm = render.save_image([digit, hm], '../heatmap.png') data_io.write(R, '../heatmap.npy') #display the image as written to file plt.imshow(digit_hm, interpolation='none') plt.axis('off') plt.show() #note that modules.Sequential allows for batch processing inputs ''' x = X[:10,:] y = nn.forward(x) R = nn.lrp(y) data_io.write(R,'../Rbatch.npy') '''
break else: # Check if the prediction has good shape prediction_shape = tuple(Y_pred.shape) if prediction_shape != correct_prediction_shape: raise BadPredictionShapeError( "Bad prediction shape! Expected {} but got {}."\ .format(correct_prediction_shape, prediction_shape) ) # Write timestamp to 'start.txt' write_timestamp(output_dir, predict_idx=prediction_order_number, timestamp=timer.exec) # Prediction files: adult.predict_0, adult.predict_1, ... filename_test = basename[:-5] + '.predict_' +\ str(prediction_order_number) # Write predictions to output_dir data_io.write(os.path.join(output_dir,filename_test), Y_pred) prediction_order_number += 1 logger.info("[+] {0:d} predictions made, time spent so far {1:.2f} sec"\ .format(prediction_order_number, time.time() - start)) logger.info("[+] Time left {0:.2f} sec".format(timer.remain)) except TimeoutException as e: logger.info("[-] Ingestion program exceeded time budget. Predictions " "made so far will be used for evaluation.") except Exception as e: ingestion_success = False logger.info("Failed to run ingestion.") logger.error("Encountered exception:\n" + str(e), exc_info=True) # Finishing ingestion program end_time = time.time() overall_time_spent = end_time - start
def predict (LD, output_dir, basename): import os import numpy as np import random import data_converter from sklearn import preprocessing, feature_selection, decomposition from sklearn.utils import shuffle import time from sklearn.externals import joblib from scipy import sparse from lasagne import layers from lasagne.updates import nesterov_momentum from lasagne.updates import norm_constraint import lasagne import theano import theano.tensor as T from lasagne.regularization import regularize_layer_params, regularize_layer_params_weighted, l2, l1 np.random.seed(0) random.seed(0) LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1) X_train = LD.data['X_train'] X_valid = LD.data['X_valid'] X_test = LD.data['X_test'] fs = decomposition.TruncatedSVD(n_components=400, n_iter=5, random_state=1) fs.fit(X_train) X_train = fs.transform(X_train) X_valid = fs.transform(X_valid) X_test = fs.transform(X_test) normx = preprocessing.Normalizer() normx.fit(X_train) X_train = normx.transform(X_train) X_valid = normx.transform(X_valid) X_test = normx.transform(X_test) y_train = np.copy(LD.data['Y_train']) def batches(X, y, csize, rs): X, y = shuffle(X, y, random_state=rs) for cstart in range(0, X.shape[0] - csize+1, csize): Xc = X[cstart:cstart+csize] yc = y[cstart:cstart+csize] Xc = np.float32(Xc) yc = np.float32(yc) yield Xc, yc input_var = T.matrix('inputs') target_var = T.matrix('targets') l_in = lasagne.layers.InputLayer(shape=(None, X_train.shape[1]), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Sparse(), input_var=input_var) l_hid1 = lasagne.layers.DenseLayer( l_in, num_units= 600, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Sparse() ) l_hid2 = lasagne.layers.DenseLayer( l_hid1, num_units= 600, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Sparse() ) Lnum_out_units = y_train.shape[1] l_out = lasagne.layers.DenseLayer( l_hid2, num_units=Lnum_out_units, nonlinearity=lasagne.nonlinearities.sigmoid) network = l_out prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.squared_error(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.3, momentum=0.90) train_fn = theano.function([input_var, target_var], loss, updates=updates) for epoch in range(20): train_err = 0 train_batches = 0 for batch in batches(X_train, y_train, epoch+1, epoch): Xt, yt = batch train_err += train_fn(Xt, yt) train_batches += 1 xml1 = T.matrix('xml1') Xlt1 = lasagne.layers.get_output(l_out, xml1, deterministic=True) f2 = theano.function([xml1], Xlt1) csize= 1000 preds_valid = np.zeros((X_valid.shape[0], y_train.shape[1])) for cstart in range(0, X_valid.shape[0], csize): Xo = X_valid[cstart:cstart+csize] Xo = np.float32(Xo) pp = f2(Xo) preds_valid[cstart:cstart+csize] = pp preds_test = np.zeros((X_test.shape[0], y_train.shape[1])) for cstart in range(0, X_test.shape[0], csize): Xo = X_test[cstart:cstart+csize] Xo = np.float32(Xo) pp = f2(Xo) preds_test[cstart:cstart+csize] = pp import data_io if LD.info['target_num'] == 1: preds_valid = preds_valid[:,1] preds_test = preds_test[:,1] eps = 0.0001 preds_valid = np.round(np.clip(preds_valid,0+eps,1-eps),4) preds_test = np.round(np.clip(preds_test,0+eps,1-eps),4) cycle = 0 filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_test), preds_test)
def blender (sd, srd, srf, src, Nworkers, stop_writing, output_dir, basename, Lstart, Ltime_budget, train_split, test_split): try: cycle = 0 #cycle 0 is all zeros best_score = -2 atbest = -2 while(1): try: time.sleep(0.5) temp_workers_data = [] workers_data = [] for wr_no in range(Nworkers): exec("wr_data = sd.worker"+str(wr_no)) if wr_data['done'] > 0: temp_workers_data.append(wr_data) wgroups = [i['blend_group'] for i in temp_workers_data] for group in np.unique(wgroups): twdata = [i for i in temp_workers_data if i['blend_group'] == group] twdata = sorted(twdata, key=itemgetter('score'), reverse=True) workers_data.append(twdata[0]) try: workers_data.append(twdata[1]) except: pass workers_data_raw = [] raw0_data = srd.raw_model if raw0_data['done'] ==1: workers_data_raw.append(raw0_data) raw1_data = srd.raw_model1 if raw1_data['done'] ==1: workers_data_raw.append(raw1_data) raw2_data = srd.raw_model2 if raw2_data['done'] ==1: workers_data_raw.append(raw2_data) raw3_data = srd.raw_model3 if raw3_data['done'] ==1: workers_data_raw.append(raw3_data) raw4_data = srd.raw_model4 if raw4_data['done'] ==1: workers_data_raw.append(raw4_data) raw5_data = srf.model1 if raw5_data['done'] ==1: workers_data_raw.append(raw5_data) raw6_data = src.model1 if raw6_data['done'] ==1: workers_data_raw.append(raw6_data) if len(workers_data_raw) > 0: workers_data_raw = sorted(workers_data_raw, key=itemgetter('score'), reverse=True) workers_data.append(workers_data_raw[0]) try: workers_data.append(workers_data_raw[1]) except: pass try: workers_data.append(workers_data_raw[2]) except: pass try: workers_data.append(workers_data_raw[3]) except: pass try: workers_data.append(workers_data_raw[4]) except: pass try: workers_data.append(workers_data_raw[5]) except: pass workers_data = sorted(workers_data, key=itemgetter('score'), reverse=True) if len(workers_data) > 0: worker0 = workers_data[0] preds_valid = worker0['preds_valid'] preds_test = worker0['preds_test'] y = sd.yt_raw[test_split:] x = worker0['preds_2fld'] exec('s0 = libscores.'+ sd.LD.info['metric'] + '(y, x, "' + sd.LD.info['task'] + '")') try: if sd.LD.info['task'] != 'regression' and s0 <= 0: exec('CVscore_auc = libscores.auc_metric(sd.yt_raw[test_split:], preds, "' + sd.LD.info['task'] + '")') s0 += CVscore_auc/10 except: pass best_score = s0 try: if s0 > atbest: atbest = best_score if sd.LD.info['target_num'] == 1: if sd.LD.info['task'] != 'regression': preds_valid = preds_valid[:,1] preds_test = preds_test[:,1] if sd.LD.info['task'] != 'regression': preds_valid = np.clip(preds_valid,0,1) preds_test = np.clip(preds_test,0,1) filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_test), preds_test) except: pass Lsample = 4 Lssample = Lsample - 1 for iter_worker in itertools.combinations(workers_data[:Lsample], 2): worker0 = iter_worker[0] worker1 = iter_worker[1] s01, validt, testt = blend2(worker0['preds_2fld'],worker1['preds_2fld'],y, sd.LD.info['metric'] , sd.LD.info['task'], worker0['preds_valid'], worker1['preds_valid'], worker0['preds_test'], worker1['preds_test']) if s01 > best_score: best_score = s01 preds_valid = validt preds_test = testt for iter_worker in itertools.combinations(workers_data[:Lssample], 3): worker0 = iter_worker[0] worker1 = iter_worker[1] worker2 = iter_worker[2] s012, validt, testt = blend3(worker0['preds_2fld'],worker1['preds_2fld'],worker2['preds_2fld'],y, sd.LD.info['metric'] , sd.LD.info['task'], worker0['preds_valid'], worker1['preds_valid'], worker2['preds_valid'], worker0['preds_test'], worker1['preds_test'], worker2['preds_test']) if s012 > best_score: best_score = s012 preds_valid = validt preds_test = testt if stop_writing.is_set() == False and best_score > atbest: atbest = best_score if sd.LD.info['target_num'] == 1: if sd.LD.info['task'] != 'regression': preds_valid = preds_valid[:,1] preds_test = preds_test[:,1] if sd.LD.info['task'] != 'regression': preds_valid = np.clip(preds_valid,0,1) preds_test = np.clip(preds_test,0,1) filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_test), preds_test) #cycle += 1 except Exception as e: print 'exception in blender process' + ' ' + str(e) # in case of any problem, let's try again except Exception as e: print 'exception in blender main process' + ' ' + str(e)
yselect = (np.arange(Y.shape[1])[na,:] == yselect)*1. R = nn.lrp(ypred*yselect) #compute first layer relvance for an arbitrarily selected class ''' #undo input normalization for digit drawing. get it back to range [0,1] per pixel x = (x + 1.) / 2. if not np == numpy: # np=cupy x = np.asnumpy(x) R = np.asnumpy(R) #render input and heatmap as rgb images digit = render.digit_to_rgb(x, scaling=3) hm = render.hm_to_rgb(R, X=x, scaling=3, sigma=2) digit_hm = render.save_image([digit, hm], '../heatmap.png') data_io.write(R, '../heatmap.npy') #display the image as written to file plt.imshow(digit_hm, interpolation='none') plt.axis('off') plt.show() #note that modules.Sequential allows for batch processing inputs if True: N = 256 t_start = time.time() x = X[:N, ...] y = nn.forward(x) R = nn.lrp(y) data_io.write(R, '../Rbatch.npy') print('Computation of {} heatmaps using {} in {:.3f}s'.format(
def predict (LD, output_dir, basename): import os import numpy as np import random np.random.seed(0) random.seed(0) import data_converter from sklearn import preprocessing, decomposition from sklearn.utils import shuffle import time from sklearn.externals import joblib from lasagne import layers from lasagne.updates import nesterov_momentum from lasagne.updates import norm_constraint, total_norm_constraint import lasagne import theano import theano.tensor as T from lasagne.regularization import regularize_layer_params, regularize_layer_params_weighted, l2, l1 LD.data['X_train'], LD.data['Y_train'] = shuffle(LD.data['X_train'], LD.data['Y_train'] , random_state=1) X_train = LD.data['X_train'] X_valid = LD.data['X_valid'] X_test = LD.data['X_test'] X_train = X_train[:, 0:2000] X_valid = X_valid[:, 0:2000] X_test = X_test[:, 0:2000] X_train = X_train.toarray() X_valid = X_valid.toarray() X_test = X_test.toarray() fs = decomposition.PCA(n_components=100) fs.fit(X_train) X_train2 = fs.transform(X_train) X_valid2 = fs.transform(X_valid) X_test2 = fs.transform(X_test) X_train = X_train[:, 0:200] X_valid = X_valid[:, 0:200] X_test = X_test[:, 0:200] X_train = np.float32(X_train) X_valid = np.float32(X_valid) X_test = np.float32(X_test) X_train = np.hstack([X_train, X_train2]) X_valid = np.hstack([X_valid, X_valid2]) X_test = np.hstack([X_test, X_test2]) normx = preprocessing.StandardScaler() normx.fit(X_train) X_train = normx.transform(X_train) X_valid = normx.transform(X_valid) X_test = normx.transform(X_test) X_train = np.float32(X_train) X_valid = np.float32(X_valid) X_test = np.float32(X_test) print "p5" y_train = np.copy(LD.data['Y_train']) y_train = np.float32(y_train) y_train = y_train.reshape((-1, 1)) def batches(X, y, csize, rs): X, y = shuffle(X, y, random_state=rs) for cstart in range(0, X.shape[0] - csize+1, csize): Xc = X[cstart:cstart+csize] yc = y[cstart:cstart+csize] yield Xc, yc input_var = T.matrix('inputs') target_var = T.matrix('targets') l_in = lasagne.layers.InputLayer(shape=(None, X_train.shape[1]), input_var=input_var, nonlinearity=None, W=lasagne.init.Sparse()) l_hid1 = lasagne.layers.DenseLayer( l_in, num_units= 100, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.Sparse()) l_hid2 = lasagne.layers.DenseLayer( l_hid1, num_units= 40, nonlinearity=lasagne.nonlinearities.tanh, W=lasagne.init.GlorotUniform() ) Lnum_out_units = 1 l_out = lasagne.layers.DenseLayer( l_hid2, num_units=Lnum_out_units, nonlinearity=None) network = l_out prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.squared_error(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(network, trainable=True) all_grads = T.grad(loss, params) scaled_grads = total_norm_constraint(all_grads, 100) updates = lasagne.updates.sgd(scaled_grads, params, learning_rate=0.001) train_fn = theano.function([input_var, target_var], loss, updates=updates) for epoch in range(1200): train_err = 0 train_batches = 0 for batch in batches(X_train, y_train, 100, epoch): Xt, yt = batch train_err += train_fn(Xt, yt) train_batches += 1 xml1 = T.matrix('xml1') Xlt1 = lasagne.layers.get_output(l_out, xml1, deterministic=True) f2 = theano.function([xml1], Xlt1) preds_valid = f2(X_valid).ravel() preds_test = f2(X_test).ravel() import data_io cycle = 0 filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_test), preds_test)
def blender(sd, srd, srf, src, Nworkers, stop_writing, output_dir, basename, Lstart, Ltime_budget, train_split, test_split): try: cycle = 0 #cycle 0 is all zeros best_score = -2 atbest = -2 while (1): try: time.sleep(0.5) temp_workers_data = [] workers_data = [] for wr_no in range(Nworkers): exec("wr_data = sd.worker" + str(wr_no)) if wr_data['done'] > 0: temp_workers_data.append(wr_data) wgroups = [i['blend_group'] for i in temp_workers_data] for group in np.unique(wgroups): twdata = [ i for i in temp_workers_data if i['blend_group'] == group ] twdata = sorted(twdata, key=itemgetter('score'), reverse=True) workers_data.append(twdata[0]) try: workers_data.append(twdata[1]) except: pass workers_data_raw = [] raw0_data = srd.raw_model if raw0_data['done'] == 1: workers_data_raw.append(raw0_data) raw1_data = srd.raw_model1 if raw1_data['done'] == 1: workers_data_raw.append(raw1_data) raw2_data = srd.raw_model2 if raw2_data['done'] == 1: workers_data_raw.append(raw2_data) raw3_data = srd.raw_model3 if raw3_data['done'] == 1: workers_data_raw.append(raw3_data) raw4_data = srd.raw_model4 if raw4_data['done'] == 1: workers_data_raw.append(raw4_data) raw5_data = srf.model1 if raw5_data['done'] == 1: workers_data_raw.append(raw5_data) raw6_data = src.model1 if raw6_data['done'] == 1: workers_data_raw.append(raw6_data) if len(workers_data_raw) > 0: workers_data_raw = sorted(workers_data_raw, key=itemgetter('score'), reverse=True) workers_data.append(workers_data_raw[0]) try: workers_data.append(workers_data_raw[1]) except: pass try: workers_data.append(workers_data_raw[2]) except: pass try: workers_data.append(workers_data_raw[3]) except: pass try: workers_data.append(workers_data_raw[4]) except: pass try: workers_data.append(workers_data_raw[5]) except: pass workers_data = sorted(workers_data, key=itemgetter('score'), reverse=True) if len(workers_data) > 0: worker0 = workers_data[0] preds_valid = worker0['preds_valid'] preds_test = worker0['preds_test'] y = sd.yt_raw[test_split:] x = worker0['preds_2fld'] exec('s0 = libscores.' + sd.LD.info['metric'] + '(y, x, "' + sd.LD.info['task'] + '")') try: if sd.LD.info['task'] != 'regression' and s0 <= 0: exec( 'CVscore_auc = libscores.auc_metric(sd.yt_raw[test_split:], preds, "' + sd.LD.info['task'] + '")') s0 += CVscore_auc / 10 except: pass best_score = s0 try: if s0 > atbest: atbest = best_score if sd.LD.info['target_num'] == 1: if sd.LD.info['task'] != 'regression': preds_valid = preds_valid[:, 1] preds_test = preds_test[:, 1] if sd.LD.info['task'] != 'regression': preds_valid = np.clip(preds_valid, 0, 1) preds_test = np.clip(preds_test, 0, 1) filename_valid = basename + '_valid_' + str( cycle).zfill(3) + '.predict' data_io.write( os.path.join(output_dir, filename_valid), preds_valid) filename_test = basename + '_test_' + str( cycle).zfill(3) + '.predict' data_io.write( os.path.join(output_dir, filename_test), preds_test) except: pass Lsample = 4 Lssample = Lsample - 1 for iter_worker in itertools.combinations( workers_data[:Lsample], 2): worker0 = iter_worker[0] worker1 = iter_worker[1] s01, validt, testt = blend2( worker0['preds_2fld'], worker1['preds_2fld'], y, sd.LD.info['metric'], sd.LD.info['task'], worker0['preds_valid'], worker1['preds_valid'], worker0['preds_test'], worker1['preds_test']) if s01 > best_score: best_score = s01 preds_valid = validt preds_test = testt for iter_worker in itertools.combinations( workers_data[:Lssample], 3): worker0 = iter_worker[0] worker1 = iter_worker[1] worker2 = iter_worker[2] s012, validt, testt = blend3( worker0['preds_2fld'], worker1['preds_2fld'], worker2['preds_2fld'], y, sd.LD.info['metric'], sd.LD.info['task'], worker0['preds_valid'], worker1['preds_valid'], worker2['preds_valid'], worker0['preds_test'], worker1['preds_test'], worker2['preds_test']) if s012 > best_score: best_score = s012 preds_valid = validt preds_test = testt if stop_writing.is_set() == False and best_score > atbest: atbest = best_score if sd.LD.info['target_num'] == 1: if sd.LD.info['task'] != 'regression': preds_valid = preds_valid[:, 1] preds_test = preds_test[:, 1] if sd.LD.info['task'] != 'regression': preds_valid = np.clip(preds_valid, 0, 1) preds_test = np.clip(preds_test, 0, 1) filename_valid = basename + '_valid_' + str( cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir, filename_valid), preds_valid) filename_test = basename + '_test_' + str(cycle).zfill( 3) + '.predict' data_io.write(os.path.join(output_dir, filename_test), preds_test) #cycle += 1 except Exception as e: print 'exception in blender process' + ' ' + str(e) # in case of any problem, let's try again except Exception as e: print 'exception in blender main process' + ' ' + str(e)
def blender(sd, srd, Nworkers, stop_writing, output_dir, basename, Lstart, Ltime_budget, Lfold): try: split = int(len(sd.LD.data["Y_train"]) * 0.5) cycle = 1 # cycle 0 is all zeros best_score = 0 atbest = 0 while 1: try: time.sleep(0.5) # limit to 100 predictions if cycle > (time.time() - Lstart) / Ltime_budget * 100: time.sleep(1) continue temp_workers_data = [] workers_data = [] for wr_no in range(Nworkers): exec ("wr_data = sd.worker" + str(wr_no)) if wr_data["done"] > 0: temp_workers_data.append(wr_data) wgroups = [i["blend_group"] for i in temp_workers_data] for group in np.unique(wgroups): twdata = [i for i in temp_workers_data if i["blend_group"] == group] twdata = sorted(twdata, key=itemgetter("score"), reverse=True) workers_data.append(twdata[0]) try: workers_data.append(twdata[1]) except: pass print group, len(twdata), len(workers_data) # this is patch for codalab VM workers_data_raw = [] raw0_data = srd.raw_model if raw0_data["done"] == 1: workers_data_raw.append(raw0_data) raw1_data = srd.raw_model1 if raw1_data["done"] == 1: workers_data_raw.append(raw1_data) raw2_data = srd.raw_model2 if raw2_data["done"] == 1: workers_data_raw.append(raw2_data) raw3_data = srd.raw_model3 if raw3_data["done"] == 1: workers_data_raw.append(raw3_data) raw4_data = srd.raw_model4 if raw4_data["done"] == 1: workers_data_raw.append(raw4_data) if len(workers_data_raw) > 0: workers_data_raw = sorted(workers_data_raw, key=itemgetter("score"), reverse=True) workers_data.append(workers_data_raw[0]) try: workers_data.append(workers_data_raw[1]) except: pass try: workers_data.append(workers_data_raw[2]) except: pass workers_data = sorted(workers_data, key=itemgetter("score"), reverse=True) if len(workers_data) > 0: worker0 = workers_data[0] preds_valid = worker0["preds_valid"] preds_test = worker0["preds_test"] y = sd.yt_raw[split:] if Lfold > 1: y = sd.yt_raw x = worker0["preds_2fld"] exec ("s0 = libscores." + sd.LD.info["metric"] + "(y, x)") best_score = s0 # short run can't wait for blend (usable only for AutoML 1) try: if s0 > atbest and cycle < 2: atbest = best_score * 0.9 # not reilable score if sd.LD.info["target_num"] == 1: preds_valid = preds_valid[:, 1] preds_test = preds_test[:, 1] preds_valid = np.clip(preds_valid, 0, 1) preds_test = np.clip(preds_test, 0, 1) filename_valid = basename + "_valid_" + str(cycle).zfill(3) + ".predict" data_io.write(os.path.join(output_dir, filename_valid), preds_valid) filename_test = basename + "_test_" + str(cycle).zfill(3) + ".predict" data_io.write(os.path.join(output_dir, filename_test), preds_test) cycle += 1 except: pass if Lfold < 4: Lsample = 4 else: Lsample = 6 xa = 0 Lssample = Lsample - 1 for iter_worker in itertools.combinations(workers_data[:Lsample], 2): xa = xa + 1 worker0 = iter_worker[0] worker1 = iter_worker[1] s01, validt, testt = blend2( worker0["preds_2fld"], worker1["preds_2fld"], y, sd.LD.info["metric"], worker0["preds_valid"], worker1["preds_valid"], worker0["preds_test"], worker1["preds_test"], ) if s01 > best_score: best_score = s01 preds_valid = validt preds_test = testt xa = 0 for iter_worker in itertools.combinations(workers_data[:Lssample], 3): xa = xa + 1 worker0 = iter_worker[0] worker1 = iter_worker[1] worker2 = iter_worker[2] s012, validt, testt = blend3( worker0["preds_2fld"], worker1["preds_2fld"], worker2["preds_2fld"], y, sd.LD.info["metric"], worker0["preds_valid"], worker1["preds_valid"], worker2["preds_valid"], worker0["preds_test"], worker1["preds_test"], worker2["preds_test"], ) if s012 > best_score: best_score = s012 preds_valid = validt preds_test = testt if stop_writing.is_set() == False: # until last 10 seconds (event signal) if best_score > atbest: atbest = best_score print "naj =", workers_data[0]["score"], best_score, atbest if sd.LD.info["target_num"] == 1: preds_valid = preds_valid[:, 1] preds_test = preds_test[:, 1] preds_valid = np.clip(preds_valid, 0, 1) preds_test = np.clip(preds_test, 0, 1) filename_valid = basename + "_valid_" + str(cycle).zfill(3) + ".predict" data_io.write(os.path.join(output_dir, filename_valid), preds_valid) filename_test = basename + "_test_" + str(cycle).zfill(3) + ".predict" data_io.write(os.path.join(output_dir, filename_test), preds_test) cycle += 1 else: print "stop writing is set" except Exception as e: print "exception in blender process" + " " + str(e) # in case of any problem, let's try again except Exception as e: print "exception in blender main process" + " " + str(e)
''' R = nn.lrp(Y[na,i]) #compute first layer relevance according to the true class label ''' ''' yselect = 3 yselect = (np.arange(Y.shape[1])[na,:] == yselect)*1. R = nn.lrp(yselect) #compute first layer relvance for an arbitrarily selected class ''' #render input and heatmap as rgb images digit = render.digit_to_rgb(x, scaling = 3) hm = render.hm_to_rgb(R, X = x, scaling = 3, sigma = 2) digit_hm = render.save_image([digit,hm],'../heatmap.png') data_io.write(R,'../heatmap.npy') #display the image as written to file plt.imshow(digit_hm, interpolation = 'none') plt.axis('off') plt.show() #note that modules.Sequential allows for batch processing inputs ''' x = X[:10,:] y = nn.forward(x) R = nn.lrp(y) data_io.write(R,'../Rbatch.npy') '''
def lrp(self,R,lrp_var=None,param=None): ''' Performs LRP by calling subroutines, depending on lrp_var and param or preset values specified via Module.set_lrp_parameters(lrp_var,lrp_param) If lrp parameters have been pre-specified (per layer), the corresponding decomposition will be applied during a call of lrp(). Specifying lrp parameters explicitly when calling lrp(), e.g. net.lrp(R,lrp_var='alpha',param=2.), will override the preset values for the current call. How to use: net.forward(X) #forward feed some data you wish to explain to populat the net. ## 우선 뉴럴 네트워크 forward를 통해 예측 진행 -- 위에 definition forward 되어 있음. then either: net.lrp() #to perform the naive approach to lrp implemented in _simple_lrp for each layer ## lrp로 실행 or: for m in net.modules: m.set_lrp_parameters(...) net.lrp() #to preset a lrp configuration to each layer in the net # 각 뉴럴 내의 python layer등을 확인하고 lrp 실행 or: net.lrp(somevariantname,someparameter) # to explicitly call the specified parametrization for all layers (where applicable) and override any preset configurations. Parameters ---------- R : numpy.ndarray final layer relevance values. usually the network's prediction of some data points ## 마지막 예측 값. Matrix형싱 for which the output relevance is to be computed dimensionality should be equal to the previously computed predictions lrp_var : str either 'none' or 'simple' or None for standard Lrp , 'epsilon' for an added epsilon slack in the denominator 'alphabeta' or 'alpha' for weighting positive and negative contributions separately. param specifies alpha with alpha + beta = 1 'flat' projects an upper layer neuron's relevance uniformly over its receptive field. 'ww' or 'w^2' only considers the square weights w_ij^2 as qantities to distribute relevances with. param : double the respective parameter for the lrp method of choice Returns ------- R : numpy.ndarray the first layer relevances as produced by the neural net wrt to the previously forward passed input data. dimensionality is equal to the previously into forward entered input data # 이전 예측 R 값이 백프로퍼게이션 진행되고 input 됨. Note ---- Requires the net to be populated with temporary variables, i.e. forward needed to be called with the input for which the explanation is to be computed. calling clean in between forward and lrp invalidates the temporary data ''' for m in self.modules[::-1]: # print("check : {}".format(m)) R = m.lrp(R,lrp_var,param) # Rfile = '../r_array/sequential_'+str(m)+'_'+str(st)+'cnt_'+'.npy' # data_io.write(R,Rfile) # print("sequential is saved") if R.shape[0] == 1 and R.shape[1] == 32: # print("check_point") # print(R.shape) data_io.write(R, '../r_array/{0}.npy'.format("norm")) elif R.shape[0] == 1 and R.shape[1] == 28: # print("check_point") # print(R.shape) data_io.write(R, '../r_array/{0}.npy'.format("first")) elif R.shape[0] == 1 and R.shape[1] == 14: # print("check_point") # print(R.shape) data_io.write(R, '../r_array/{0}.npy'.format("sec")) elif R.shape[0] == 1 and R.shape[1] == 5: # print("check_point") # print(R.shape) data_io.write(R, '../r_array/{0}.npy'.format("thrd")) elif R.shape[1] == 1 and R.shape[3] == 100: # print("check_point") # print(R.shape) data_io.write(R, '../r_array/{0}.npy'.format("foth")) else: pass data_io.write(R, '../r_array/sequential.npy') # print("sequential Rx is saved") return R
# Make predictions # ----------------- Y_valid = M.predict_proba(D.data['X_valid']) Y_test = M.predict_proba(D.data['X_test']) vprint( verbose, "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start)) # Write results # ------------- if overwrite_output: filename_valid = basename + '_valid.predict' filename_test = basename + '_test.predict' else: filename_valid = basename + '_valid_' + the_date + '.predict' filename_test = basename + '_test_' + the_date + '.predict' vprint( verbose, "======== Saving results to: " + output_dir) data_io.write(os.path.join(output_dir,filename_valid), Y_valid) data_io.write(os.path.join(output_dir,filename_test), Y_test) vprint( verbose, "[+] Results saved, time spent so far %5.2f sec" % (time.time() - start)) time_spent = time.time() - start time_left_over = time_budget - time_spent vprint( verbose, "[+] End cycle, time left %5.2f sec" % time_left_over) if time_left_over<=0: break # Clean up del D del M gc.collect() if zipme: vprint( verbose, "========= Zipping this directory to prepare for submit ==============") data_io.zipdir(submission_filename + '.zip', ".") vprint( verbose, "See: " + submission_filename + '.zip')
model_dir = 'sample_code_submission/' problem_dir = 'ingestion_program/' score_dir = 'scoring_program/' from sys import path path.append(model_dir) path.append(problem_dir) path.append(score_dir) import numpy as np datadir = '../public_data' from data_manager import DataManager D = DataManager(dataname, datadir, replace_missing=True) X_train = D.data['X_train'] Y_train = D.data['Y_train'] model = Model(time_left_for_this_task=1200) # Change the time budget!!!! model.fit(X_train, Y_train) Y_hat_valid = model.predict(D.data['X_valid']) Y_hat_test = model.predict(D.data['X_test']) result_name = 'sample_result_submission/' + dataname from data_io import write write(result_name + '_valid.predict', Y_hat_valid) write(result_name + '_test.predict', Y_hat_test) from subprocess import call call(["zip", "-rj", "autosklearn", "sample_result_submission/"])