'_' + str(hidden_layer_sizes)[1:-1].replace(', ','_') + \ '_' + activation start_time = time.time() regr = MLPRegressor(random_state=RANDOM_STATE, hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=solver, alpha=alpha, learning_rate=learning_rate, verbose=True) regr.fit(X_train_scaled_DR_scaled, Y_train_scaled) # fine tune the model regr.warm_start = True regr.learning_rate_init /= 10 # default 0.001 regr.fit(X_train_scaled_DR_scaled, Y_train_scaled) regr.learning_rate_init /= 10 # default 0.001 regr.fit(X_train_scaled_DR_scaled, Y_train_scaled) print("--- %s seconds ---" % (time.time() - start_time)) # save model dir_model = '../models/' + cheap_node_id + '/' if not os.path.exists(dir_model): os.mkdir(dir_model) dir_model += 'whole/' if not os.path.exists(dir_model): os.mkdir(dir_model)
def train_ANN(descriptors_filename, target_values_filename, architecture,\ ANN_seed, split_seed, T): ########## preprocess ########## ### read files ### # read the training and target data fv = pd.read_csv(descriptors_filename) value = pd.read_csv(target_values_filename) ### prepare training set ### # prepare CIDs CIDs = np.array(fv['CID']) # prepare target, train, test arrays target = np.array(value['a']) # construct dictionary: CID to feature vector fv_dict = {} for cid,row in zip(CIDs, fv.values[:,1:]): fv_dict[cid] = row # construct dictionary: CID to target value target_dict = {} for cid, val in zip(np.array(value['CID']), np.array(value['a'])): target_dict[cid] = val # check CIDs: target_values_filename should contain all CIDs that appear in descriptors_filename for cid in CIDs: if cid not in target_dict: sys.stderr.write('error: {} misses the target value of CID {}\n'.format(target_values_filename, cid)) exit(1) # construct x and y so that the CIDs are ordered in ascending order CIDs.sort() x = np.array([fv_dict[cid] for cid in CIDs]) y = np.array([target_dict[cid] for cid in CIDs]) # obtain numbers of examples and features numdata = x.shape[0] numfeature = x.shape[1] ### prepare learning ### # initialize an ANN - MLP regressor reg = MLPRegressor(activation='relu', solver='adam', alpha=1e-5, hidden_layer_sizes=architecture, random_state=ANN_seed, early_stopping=False) # initalize array that stores the result R = {} # R[<key>][fold][t] for key in R_key: R[key] = [] for fold in range(CV): R[key].append(dict()) # separate the data randomly for cross-validation kf = KFold(n_splits=CV, shuffle=True, random_state=split_seed) fold = -1 ### start learning experiments ### for train, test in kf.split(x): fold += 1 x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test] reg.warm_start = False start = time.time() print("\n\n### (ANN_seed, split_seed)=({},{}), fold={}/{} ###".format(ANN_seed, split_seed, fold+1, CV)) print("# t\ttrain\ttest\ttime") # learn ANN, but stop the learning at itr=t in order to record stats for t in T: reg.max_iter = t reg.fit(x_train, y_train) reg.warm_start = True # obtain the prediction to compute MAE pred = reg.predict(x) pred_train = reg.predict(x_train) pred_test = reg.predict(x_test) # calculate the prediction score (R^2) R["R2train"][fold][t] = reg.score(x_train,y_train) R["R2test"][fold][t] = reg.score(x_test,y_test) R["R2all"][fold][t] = reg.score(x,y) # calculate MAE R["MAEtrain"][fold][t] = mean_absolute_error(y_train,pred_train) R["MAEtest"][fold][t] = mean_absolute_error(y_test,pred_test) R["MAEall"][fold][t] = mean_absolute_error(y,pred) # store time and ref R["time"][fold][t] = time.time() - start R["reg"][fold][t] = copy.deepcopy(reg) print("{}\t{:.4f}\t{:.4f}\t{:.4f}".format(t, R["R2train"][fold][t], R["R2test"][fold][t], R["time"][fold][t])) return R